Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/missing.py: 15%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Routines for filling missing data.
3"""
4from __future__ import annotations
6from functools import (
7 partial,
8 wraps,
9)
10from typing import (
11 TYPE_CHECKING,
12 Any,
13 cast,
14)
16import numpy as np
18from pandas._libs import (
19 NaT,
20 algos,
21 lib,
22)
23from pandas._typing import (
24 ArrayLike,
25 Axis,
26 AxisInt,
27 F,
28 npt,
29)
30from pandas.compat._optional import import_optional_dependency
32from pandas.core.dtypes.cast import infer_dtype_from
33from pandas.core.dtypes.common import (
34 is_array_like,
35 is_numeric_v_string_like,
36 is_object_dtype,
37 needs_i8_conversion,
38)
39from pandas.core.dtypes.missing import (
40 is_valid_na_for_dtype,
41 isna,
42 na_value_for_dtype,
43)
45if TYPE_CHECKING:
46 from pandas import Index
49def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):
50 """
51 Validate the size of the values passed to ExtensionArray.fillna.
52 """
53 if is_array_like(value):
54 if len(value) != length:
55 raise ValueError(
56 f"Length of 'value' does not match. Got ({len(value)}) "
57 f" expected {length}"
58 )
59 value = value[mask]
61 return value
64def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
65 """
66 Return a masking array of same size/shape as arr
67 with entries equaling any member of values_to_mask set to True
69 Parameters
70 ----------
71 arr : ArrayLike
72 values_to_mask: list, tuple, or scalar
74 Returns
75 -------
76 np.ndarray[bool]
77 """
78 # When called from Block.replace/replace_list, values_to_mask is a scalar
79 # known to be holdable by arr.
80 # When called from Series._single_replace, values_to_mask is tuple or list
81 dtype, values_to_mask = infer_dtype_from(values_to_mask)
82 # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any],
83 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
84 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
85 # _DTypeDict, Tuple[Any, Any]]]"
86 values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type]
88 potential_na = False
89 if is_object_dtype(arr):
90 # pre-compute mask to avoid comparison to NA
91 potential_na = True
92 arr_mask = ~isna(arr)
94 na_mask = isna(values_to_mask)
95 nonna = values_to_mask[~na_mask]
97 # GH 21977
98 mask = np.zeros(arr.shape, dtype=bool)
99 for x in nonna:
100 if is_numeric_v_string_like(arr, x):
101 # GH#29553 prevent numpy deprecation warnings
102 pass
103 else:
104 if potential_na:
105 new_mask = np.zeros(arr.shape, dtype=np.bool_)
106 new_mask[arr_mask] = arr[arr_mask] == x
107 else:
108 new_mask = arr == x
110 if not isinstance(new_mask, np.ndarray):
111 # usually BooleanArray
112 new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
113 mask |= new_mask
115 if na_mask.any():
116 mask |= isna(arr)
118 return mask
121def clean_fill_method(method: str | None, allow_nearest: bool = False):
122 # asfreq is compat for resampling
123 if method in [None, "asfreq"]:
124 return None
126 if isinstance(method, str):
127 method = method.lower()
128 if method == "ffill":
129 method = "pad"
130 elif method == "bfill":
131 method = "backfill"
133 valid_methods = ["pad", "backfill"]
134 expecting = "pad (ffill) or backfill (bfill)"
135 if allow_nearest:
136 valid_methods.append("nearest")
137 expecting = "pad (ffill), backfill (bfill) or nearest"
138 if method not in valid_methods:
139 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")
140 return method
143# interpolation methods that dispatch to np.interp
145NP_METHODS = ["linear", "time", "index", "values"]
147# interpolation methods that dispatch to _interpolate_scipy_wrapper
149SP_METHODS = [
150 "nearest",
151 "zero",
152 "slinear",
153 "quadratic",
154 "cubic",
155 "barycentric",
156 "krogh",
157 "spline",
158 "polynomial",
159 "from_derivatives",
160 "piecewise_polynomial",
161 "pchip",
162 "akima",
163 "cubicspline",
164]
167def clean_interp_method(method: str, index: Index, **kwargs) -> str:
168 order = kwargs.get("order")
170 if method in ("spline", "polynomial") and order is None:
171 raise ValueError("You must specify the order of the spline or polynomial.")
173 valid = NP_METHODS + SP_METHODS
174 if method not in valid:
175 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")
177 if method in ("krogh", "piecewise_polynomial", "pchip"):
178 if not index.is_monotonic_increasing:
179 raise ValueError(
180 f"{method} interpolation requires that the index be monotonic."
181 )
183 return method
186def find_valid_index(
187 values, *, how: str, is_valid: npt.NDArray[np.bool_]
188) -> int | None:
189 """
190 Retrieves the index of the first valid value.
192 Parameters
193 ----------
194 values : ndarray or ExtensionArray
195 how : {'first', 'last'}
196 Use this parameter to change between the first or last valid index.
197 is_valid: np.ndarray
198 Mask to find na_values.
200 Returns
201 -------
202 int or None
203 """
204 assert how in ["first", "last"]
206 if len(values) == 0: # early stop
207 return None
209 if values.ndim == 2:
210 is_valid = is_valid.any(axis=1) # reduce axis 1
212 if how == "first":
213 idxpos = is_valid[::].argmax()
215 elif how == "last":
216 idxpos = len(values) - 1 - is_valid[::-1].argmax()
218 chk_notna = is_valid[idxpos]
220 if not chk_notna:
221 return None
222 # Incompatible return value type (got "signedinteger[Any]",
223 # expected "Optional[int]")
224 return idxpos # type: ignore[return-value]
227def interpolate_array_2d(
228 data: np.ndarray,
229 method: str = "pad",
230 axis: AxisInt = 0,
231 index: Index | None = None,
232 limit: int | None = None,
233 limit_direction: str = "forward",
234 limit_area: str | None = None,
235 fill_value: Any | None = None,
236 coerce: bool = False,
237 downcast: str | None = None,
238 **kwargs,
239) -> None:
240 """
241 Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill.
243 Notes
244 -----
245 Alters 'data' in-place.
246 """
247 try:
248 m = clean_fill_method(method)
249 except ValueError:
250 m = None
252 if m is not None:
253 if fill_value is not None:
254 # similar to validate_fillna_kwargs
255 raise ValueError("Cannot pass both fill_value and method")
257 interpolate_2d(
258 data,
259 method=m,
260 axis=axis,
261 limit=limit,
262 limit_area=limit_area,
263 )
264 else:
265 assert index is not None # for mypy
267 _interpolate_2d_with_fill(
268 data=data,
269 index=index,
270 axis=axis,
271 method=method,
272 limit=limit,
273 limit_direction=limit_direction,
274 limit_area=limit_area,
275 fill_value=fill_value,
276 **kwargs,
277 )
280def _interpolate_2d_with_fill(
281 data: np.ndarray, # floating dtype
282 index: Index,
283 axis: AxisInt,
284 method: str = "linear",
285 limit: int | None = None,
286 limit_direction: str = "forward",
287 limit_area: str | None = None,
288 fill_value: Any | None = None,
289 **kwargs,
290) -> None:
291 """
292 Column-wise application of _interpolate_1d.
294 Notes
295 -----
296 Alters 'data' in-place.
298 The signature does differ from _interpolate_1d because it only
299 includes what is needed for Block.interpolate.
300 """
301 # validate the interp method
302 clean_interp_method(method, index, **kwargs)
304 if is_valid_na_for_dtype(fill_value, data.dtype):
305 fill_value = na_value_for_dtype(data.dtype, compat=False)
307 if method == "time":
308 if not needs_i8_conversion(index.dtype):
309 raise ValueError(
310 "time-weighted interpolation only works "
311 "on Series or DataFrames with a "
312 "DatetimeIndex"
313 )
314 method = "values"
316 valid_limit_directions = ["forward", "backward", "both"]
317 limit_direction = limit_direction.lower()
318 if limit_direction not in valid_limit_directions:
319 raise ValueError(
320 "Invalid limit_direction: expecting one of "
321 f"{valid_limit_directions}, got '{limit_direction}'."
322 )
324 if limit_area is not None:
325 valid_limit_areas = ["inside", "outside"]
326 limit_area = limit_area.lower()
327 if limit_area not in valid_limit_areas:
328 raise ValueError(
329 f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
330 f"{limit_area}."
331 )
333 # default limit is unlimited GH #16282
334 limit = algos.validate_limit(nobs=None, limit=limit)
336 indices = _index_to_interp_indices(index, method)
338 def func(yvalues: np.ndarray) -> None:
339 # process 1-d slices in the axis direction
341 _interpolate_1d(
342 indices=indices,
343 yvalues=yvalues,
344 method=method,
345 limit=limit,
346 limit_direction=limit_direction,
347 limit_area=limit_area,
348 fill_value=fill_value,
349 bounds_error=False,
350 **kwargs,
351 )
353 # error: Argument 1 to "apply_along_axis" has incompatible type
354 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[...,
355 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray
356 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],
357 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],
358 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]"
359 np.apply_along_axis(func, axis, data) # type: ignore[arg-type]
362def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:
363 """
364 Convert Index to ndarray of indices to pass to NumPy/SciPy.
365 """
366 xarr = index._values
367 if needs_i8_conversion(xarr.dtype):
368 # GH#1646 for dt64tz
369 xarr = xarr.view("i8")
371 if method == "linear":
372 inds = xarr
373 inds = cast(np.ndarray, inds)
374 else:
375 inds = np.asarray(xarr)
377 if method in ("values", "index"):
378 if inds.dtype == np.object_:
379 inds = lib.maybe_convert_objects(inds)
381 return inds
384def _interpolate_1d(
385 indices: np.ndarray,
386 yvalues: np.ndarray,
387 method: str | None = "linear",
388 limit: int | None = None,
389 limit_direction: str = "forward",
390 limit_area: str | None = None,
391 fill_value: Any | None = None,
392 bounds_error: bool = False,
393 order: int | None = None,
394 **kwargs,
395) -> None:
396 """
397 Logic for the 1-d interpolation. The input
398 indices and yvalues will each be 1-d arrays of the same length.
400 Bounds_error is currently hardcoded to False since non-scipy ones don't
401 take it as an argument.
403 Notes
404 -----
405 Fills 'yvalues' in-place.
406 """
408 invalid = isna(yvalues)
409 valid = ~invalid
411 if not valid.any():
412 return
414 if valid.all():
415 return
417 # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
418 all_nans = set(np.flatnonzero(invalid))
420 first_valid_index = find_valid_index(yvalues, how="first", is_valid=valid)
421 if first_valid_index is None: # no nan found in start
422 first_valid_index = 0
423 start_nans = set(range(first_valid_index))
425 last_valid_index = find_valid_index(yvalues, how="last", is_valid=valid)
426 if last_valid_index is None: # no nan found in end
427 last_valid_index = len(yvalues)
428 end_nans = set(range(1 + last_valid_index, len(valid)))
430 # Like the sets above, preserve_nans contains indices of invalid values,
431 # but in this case, it is the final set of indices that need to be
432 # preserved as NaN after the interpolation.
434 # For example if limit_direction='forward' then preserve_nans will
435 # contain indices of NaNs at the beginning of the series, and NaNs that
436 # are more than 'limit' away from the prior non-NaN.
438 # set preserve_nans based on direction using _interp_limit
439 preserve_nans: list | set
440 if limit_direction == "forward":
441 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
442 elif limit_direction == "backward":
443 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
444 else:
445 # both directions... just use _interp_limit
446 preserve_nans = set(_interp_limit(invalid, limit, limit))
448 # if limit_area is set, add either mid or outside indices
449 # to preserve_nans GH #16284
450 if limit_area == "inside":
451 # preserve NaNs on the outside
452 preserve_nans |= start_nans | end_nans
453 elif limit_area == "outside":
454 # preserve NaNs on the inside
455 mid_nans = all_nans - start_nans - end_nans
456 preserve_nans |= mid_nans
458 # sort preserve_nans and convert to list
459 preserve_nans = sorted(preserve_nans)
461 is_datetimelike = needs_i8_conversion(yvalues.dtype)
463 if is_datetimelike:
464 yvalues = yvalues.view("i8")
466 if method in NP_METHODS:
467 # np.interp requires sorted X values, #21037
469 indexer = np.argsort(indices[valid])
470 yvalues[invalid] = np.interp(
471 indices[invalid], indices[valid][indexer], yvalues[valid][indexer]
472 )
473 else:
474 yvalues[invalid] = _interpolate_scipy_wrapper(
475 indices[valid],
476 yvalues[valid],
477 indices[invalid],
478 method=method,
479 fill_value=fill_value,
480 bounds_error=bounds_error,
481 order=order,
482 **kwargs,
483 )
485 if is_datetimelike:
486 yvalues[preserve_nans] = NaT.value
487 else:
488 yvalues[preserve_nans] = np.nan
489 return
492def _interpolate_scipy_wrapper(
493 x,
494 y,
495 new_x,
496 method,
497 fill_value=None,
498 bounds_error: bool = False,
499 order=None,
500 **kwargs,
501):
502 """
503 Passed off to scipy.interpolate.interp1d. method is scipy's kind.
504 Returns an array interpolated at new_x. Add any new methods to
505 the list in _clean_interp_method.
506 """
507 extra = f"{method} interpolation requires SciPy."
508 import_optional_dependency("scipy", extra=extra)
509 from scipy import interpolate
511 new_x = np.asarray(new_x)
513 # ignores some kwargs that could be passed along.
514 alt_methods = {
515 "barycentric": interpolate.barycentric_interpolate,
516 "krogh": interpolate.krogh_interpolate,
517 "from_derivatives": _from_derivatives,
518 "piecewise_polynomial": _from_derivatives,
519 }
521 if getattr(x, "_is_all_dates", False):
522 # GH 5975, scipy.interp1d can't handle datetime64s
523 x, new_x = x._values.astype("i8"), new_x.astype("i8")
525 if method == "pchip":
526 alt_methods["pchip"] = interpolate.pchip_interpolate
527 elif method == "akima":
528 alt_methods["akima"] = _akima_interpolate
529 elif method == "cubicspline":
530 alt_methods["cubicspline"] = _cubicspline_interpolate
532 interp1d_methods = [
533 "nearest",
534 "zero",
535 "slinear",
536 "quadratic",
537 "cubic",
538 "polynomial",
539 ]
540 if method in interp1d_methods:
541 if method == "polynomial":
542 method = order
543 terp = interpolate.interp1d(
544 x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error
545 )
546 new_y = terp(new_x)
547 elif method == "spline":
548 # GH #10633, #24014
549 if isna(order) or (order <= 0):
550 raise ValueError(
551 f"order needs to be specified and greater than 0; got order: {order}"
552 )
553 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)
554 new_y = terp(new_x)
555 else:
556 # GH 7295: need to be able to write for some reason
557 # in some circumstances: check all three
558 if not x.flags.writeable:
559 x = x.copy()
560 if not y.flags.writeable:
561 y = y.copy()
562 if not new_x.flags.writeable:
563 new_x = new_x.copy()
564 method = alt_methods[method]
565 new_y = method(x, y, new_x, **kwargs)
566 return new_y
569def _from_derivatives(
570 xi, yi, x, order=None, der: int | list[int] | None = 0, extrapolate: bool = False
571):
572 """
573 Convenience function for interpolate.BPoly.from_derivatives.
575 Construct a piecewise polynomial in the Bernstein basis, compatible
576 with the specified values and derivatives at breakpoints.
578 Parameters
579 ----------
580 xi : array-like
581 sorted 1D array of x-coordinates
582 yi : array-like or list of array-likes
583 yi[i][j] is the j-th derivative known at xi[i]
584 order: None or int or array-like of ints. Default: None.
585 Specifies the degree of local polynomials. If not None, some
586 derivatives are ignored.
587 der : int or list
588 How many derivatives to extract; None for all potentially nonzero
589 derivatives (that is a number equal to the number of points), or a
590 list of derivatives to extract. This number includes the function
591 value as 0th derivative.
592 extrapolate : bool, optional
593 Whether to extrapolate to ouf-of-bounds points based on first and last
594 intervals, or to return NaNs. Default: True.
596 See Also
597 --------
598 scipy.interpolate.BPoly.from_derivatives
600 Returns
601 -------
602 y : scalar or array-like
603 The result, of length R or length M or M by R.
604 """
605 from scipy import interpolate
607 # return the method for compat with scipy version & backwards compat
608 method = interpolate.BPoly.from_derivatives
609 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate)
611 return m(x)
614def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt = 0):
615 """
616 Convenience function for akima interpolation.
617 xi and yi are arrays of values used to approximate some function f,
618 with ``yi = f(xi)``.
620 See `Akima1DInterpolator` for details.
622 Parameters
623 ----------
624 xi : array-like
625 A sorted list of x-coordinates, of length N.
626 yi : array-like
627 A 1-D array of real values. `yi`'s length along the interpolation
628 axis must be equal to the length of `xi`. If N-D array, use axis
629 parameter to select correct axis.
630 x : scalar or array-like
631 Of length M.
632 der : int, optional
633 How many derivatives to extract; None for all potentially
634 nonzero derivatives (that is a number equal to the number
635 of points), or a list of derivatives to extract. This number
636 includes the function value as 0th derivative.
637 axis : int, optional
638 Axis in the yi array corresponding to the x-coordinate values.
640 See Also
641 --------
642 scipy.interpolate.Akima1DInterpolator
644 Returns
645 -------
646 y : scalar or array-like
647 The result, of length R or length M or M by R,
649 """
650 from scipy import interpolate
652 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis)
654 return P(x, nu=der)
657def _cubicspline_interpolate(
658 xi,
659 yi,
660 x,
661 axis: AxisInt = 0,
662 bc_type: str | tuple[Any, Any] = "not-a-knot",
663 extrapolate=None,
664):
665 """
666 Convenience function for cubic spline data interpolator.
668 See `scipy.interpolate.CubicSpline` for details.
670 Parameters
671 ----------
672 xi : array-like, shape (n,)
673 1-d array containing values of the independent variable.
674 Values must be real, finite and in strictly increasing order.
675 yi : array-like
676 Array containing values of the dependent variable. It can have
677 arbitrary number of dimensions, but the length along ``axis``
678 (see below) must match the length of ``x``. Values must be finite.
679 x : scalar or array-like, shape (m,)
680 axis : int, optional
681 Axis along which `y` is assumed to be varying. Meaning that for
682 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
683 Default is 0.
684 bc_type : string or 2-tuple, optional
685 Boundary condition type. Two additional equations, given by the
686 boundary conditions, are required to determine all coefficients of
687 polynomials on each segment [2]_.
688 If `bc_type` is a string, then the specified condition will be applied
689 at both ends of a spline. Available conditions are:
690 * 'not-a-knot' (default): The first and second segment at a curve end
691 are the same polynomial. It is a good default when there is no
692 information on boundary conditions.
693 * 'periodic': The interpolated functions is assumed to be periodic
694 of period ``x[-1] - x[0]``. The first and last value of `y` must be
695 identical: ``y[0] == y[-1]``. This boundary condition will result in
696 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.
697 * 'clamped': The first derivative at curves ends are zero. Assuming
698 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.
699 * 'natural': The second derivative at curve ends are zero. Assuming
700 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.
701 If `bc_type` is a 2-tuple, the first and the second value will be
702 applied at the curve start and end respectively. The tuple values can
703 be one of the previously mentioned strings (except 'periodic') or a
704 tuple `(order, deriv_values)` allowing to specify arbitrary
705 derivatives at curve ends:
706 * `order`: the derivative order, 1 or 2.
707 * `deriv_value`: array-like containing derivative values, shape must
708 be the same as `y`, excluding ``axis`` dimension. For example, if
709 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
710 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
711 and have the shape (n0, n1).
712 extrapolate : {bool, 'periodic', None}, optional
713 If bool, determines whether to extrapolate to out-of-bounds points
714 based on first and last intervals, or to return NaNs. If 'periodic',
715 periodic extrapolation is used. If None (default), ``extrapolate`` is
716 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.
718 See Also
719 --------
720 scipy.interpolate.CubicHermiteSpline
722 Returns
723 -------
724 y : scalar or array-like
725 The result, of shape (m,)
727 References
728 ----------
729 .. [1] `Cubic Spline Interpolation
730 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_
731 on Wikiversity.
732 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.
733 """
734 from scipy import interpolate
736 P = interpolate.CubicSpline(
737 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate
738 )
740 return P(x)
743def _interpolate_with_limit_area(
744 values: np.ndarray, method: str, limit: int | None, limit_area: str | None
745) -> None:
746 """
747 Apply interpolation and limit_area logic to values along a to-be-specified axis.
749 Parameters
750 ----------
751 values: np.ndarray
752 Input array.
753 method: str
754 Interpolation method. Could be "bfill" or "pad"
755 limit: int, optional
756 Index limit on interpolation.
757 limit_area: str
758 Limit area for interpolation. Can be "inside" or "outside"
760 Notes
761 -----
762 Modifies values in-place.
763 """
765 invalid = isna(values)
766 is_valid = ~invalid
768 if not invalid.all():
769 first = find_valid_index(values, how="first", is_valid=is_valid)
770 if first is None:
771 first = 0
772 last = find_valid_index(values, how="last", is_valid=is_valid)
773 if last is None:
774 last = len(values)
776 interpolate_2d(
777 values,
778 method=method,
779 limit=limit,
780 )
782 if limit_area == "inside":
783 invalid[first : last + 1] = False
784 elif limit_area == "outside":
785 invalid[:first] = invalid[last + 1 :] = False
787 values[invalid] = np.nan
790def interpolate_2d(
791 values: np.ndarray,
792 method: str = "pad",
793 axis: Axis = 0,
794 limit: int | None = None,
795 limit_area: str | None = None,
796) -> None:
797 """
798 Perform an actual interpolation of values, values will be make 2-d if
799 needed fills inplace, returns the result.
801 Parameters
802 ----------
803 values: np.ndarray
804 Input array.
805 method: str, default "pad"
806 Interpolation method. Could be "bfill" or "pad"
807 axis: 0 or 1
808 Interpolation axis
809 limit: int, optional
810 Index limit on interpolation.
811 limit_area: str, optional
812 Limit area for interpolation. Can be "inside" or "outside"
814 Notes
815 -----
816 Modifies values in-place.
817 """
818 if limit_area is not None:
819 np.apply_along_axis(
820 # error: Argument 1 to "apply_along_axis" has incompatible type
821 # "partial[None]"; expected
822 # "Callable[..., Union[_SupportsArray[dtype[<nothing>]],
823 # Sequence[_SupportsArray[dtype[<nothing>]]],
824 # Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],
825 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],
826 # Sequence[Sequence[Sequence[Sequence[_
827 # SupportsArray[dtype[<nothing>]]]]]]]]"
828 partial( # type: ignore[arg-type]
829 _interpolate_with_limit_area,
830 method=method,
831 limit=limit,
832 limit_area=limit_area,
833 ),
834 # error: Argument 2 to "apply_along_axis" has incompatible type
835 # "Union[str, int]"; expected "SupportsIndex"
836 axis, # type: ignore[arg-type]
837 values,
838 )
839 return
841 transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
843 # reshape a 1 dim if needed
844 if values.ndim == 1:
845 if axis != 0: # pragma: no cover
846 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")
847 values = values.reshape(tuple((1,) + values.shape))
849 method = clean_fill_method(method)
850 tvalues = transf(values)
852 # _pad_2d and _backfill_2d both modify tvalues inplace
853 if method == "pad":
854 _pad_2d(tvalues, limit=limit)
855 else:
856 _backfill_2d(tvalues, limit=limit)
858 return
861def _fillna_prep(
862 values, mask: npt.NDArray[np.bool_] | None = None
863) -> npt.NDArray[np.bool_]:
864 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
866 if mask is None:
867 mask = isna(values)
869 mask = mask.view(np.uint8)
870 return mask
873def _datetimelike_compat(func: F) -> F:
874 """
875 Wrapper to handle datetime64 and timedelta64 dtypes.
876 """
878 @wraps(func)
879 def new_func(values, limit=None, mask=None):
880 if needs_i8_conversion(values.dtype):
881 if mask is None:
882 # This needs to occur before casting to int64
883 mask = isna(values)
885 result, mask = func(values.view("i8"), limit=limit, mask=mask)
886 return result.view(values.dtype), mask
888 return func(values, limit=limit, mask=mask)
890 return cast(F, new_func)
893@_datetimelike_compat
894def _pad_1d(
895 values: np.ndarray,
896 limit: int | None = None,
897 mask: npt.NDArray[np.bool_] | None = None,
898) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
899 mask = _fillna_prep(values, mask)
900 algos.pad_inplace(values, mask, limit=limit)
901 return values, mask
904@_datetimelike_compat
905def _backfill_1d(
906 values: np.ndarray,
907 limit: int | None = None,
908 mask: npt.NDArray[np.bool_] | None = None,
909) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
910 mask = _fillna_prep(values, mask)
911 algos.backfill_inplace(values, mask, limit=limit)
912 return values, mask
915@_datetimelike_compat
916def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None):
917 mask = _fillna_prep(values, mask)
919 if np.all(values.shape):
920 algos.pad_2d_inplace(values, mask, limit=limit)
921 else:
922 # for test coverage
923 pass
924 return values, mask
927@_datetimelike_compat
928def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None):
929 mask = _fillna_prep(values, mask)
931 if np.all(values.shape):
932 algos.backfill_2d_inplace(values, mask, limit=limit)
933 else:
934 # for test coverage
935 pass
936 return values, mask
939_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}
942def get_fill_func(method, ndim: int = 1):
943 method = clean_fill_method(method)
944 if ndim == 1:
945 return _fill_methods[method]
946 return {"pad": _pad_2d, "backfill": _backfill_2d}[method]
949def clean_reindex_fill_method(method) -> str | None:
950 return clean_fill_method(method, allow_nearest=True)
953def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit):
954 """
955 Get indexers of values that won't be filled
956 because they exceed the limits.
958 Parameters
959 ----------
960 invalid : np.ndarray[bool]
961 fw_limit : int or None
962 forward limit to index
963 bw_limit : int or None
964 backward limit to index
966 Returns
967 -------
968 set of indexers
970 Notes
971 -----
972 This is equivalent to the more readable, but slower
974 .. code-block:: python
976 def _interp_limit(invalid, fw_limit, bw_limit):
977 for x in np.where(invalid)[0]:
978 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
979 yield x
980 """
981 # handle forward first; the backward direction is the same except
982 # 1. operate on the reversed array
983 # 2. subtract the returned indices from N - 1
984 N = len(invalid)
985 f_idx = set()
986 b_idx = set()
988 def inner(invalid, limit):
989 limit = min(limit, N)
990 windowed = _rolling_window(invalid, limit + 1).all(1)
991 idx = set(np.where(windowed)[0] + limit) | set(
992 np.where((~invalid[: limit + 1]).cumsum() == 0)[0]
993 )
994 return idx
996 if fw_limit is not None:
997 if fw_limit == 0:
998 f_idx = set(np.where(invalid)[0])
999 else:
1000 f_idx = inner(invalid, fw_limit)
1002 if bw_limit is not None:
1003 if bw_limit == 0:
1004 # then we don't even need to care about backwards
1005 # just use forwards
1006 return f_idx
1007 else:
1008 b_idx_inv = list(inner(invalid[::-1], bw_limit))
1009 b_idx = set(N - 1 - np.asarray(b_idx_inv))
1010 if fw_limit == 0:
1011 return b_idx
1013 return f_idx & b_idx
1016def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]:
1017 """
1018 [True, True, False, True, False], 2 ->
1020 [
1021 [True, True],
1022 [True, False],
1023 [False, True],
1024 [True, False],
1025 ]
1026 """
1027 # https://stackoverflow.com/a/6811241
1028 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
1029 strides = a.strides + (a.strides[-1],)
1030 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)