1"""
2Routines for filling missing data.
3"""
4from __future__ import annotations
5
6from functools import (
7 partial,
8 wraps,
9)
10from typing import (
11 TYPE_CHECKING,
12 Any,
13 cast,
14)
15
16import numpy as np
17
18from pandas._libs import (
19 NaT,
20 algos,
21 lib,
22)
23from pandas._typing import (
24 ArrayLike,
25 Axis,
26 AxisInt,
27 F,
28 npt,
29)
30from pandas.compat._optional import import_optional_dependency
31
32from pandas.core.dtypes.cast import infer_dtype_from
33from pandas.core.dtypes.common import (
34 is_array_like,
35 is_numeric_v_string_like,
36 is_object_dtype,
37 needs_i8_conversion,
38)
39from pandas.core.dtypes.missing import (
40 is_valid_na_for_dtype,
41 isna,
42 na_value_for_dtype,
43)
44
45if TYPE_CHECKING:
46 from pandas import Index
47
48
49def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):
50 """
51 Validate the size of the values passed to ExtensionArray.fillna.
52 """
53 if is_array_like(value):
54 if len(value) != length:
55 raise ValueError(
56 f"Length of 'value' does not match. Got ({len(value)}) "
57 f" expected {length}"
58 )
59 value = value[mask]
60
61 return value
62
63
64def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
65 """
66 Return a masking array of same size/shape as arr
67 with entries equaling any member of values_to_mask set to True
68
69 Parameters
70 ----------
71 arr : ArrayLike
72 values_to_mask: list, tuple, or scalar
73
74 Returns
75 -------
76 np.ndarray[bool]
77 """
78 # When called from Block.replace/replace_list, values_to_mask is a scalar
79 # known to be holdable by arr.
80 # When called from Series._single_replace, values_to_mask is tuple or list
81 dtype, values_to_mask = infer_dtype_from(values_to_mask)
82 # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any],
83 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
84 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
85 # _DTypeDict, Tuple[Any, Any]]]"
86 values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type]
87
88 potential_na = False
89 if is_object_dtype(arr):
90 # pre-compute mask to avoid comparison to NA
91 potential_na = True
92 arr_mask = ~isna(arr)
93
94 na_mask = isna(values_to_mask)
95 nonna = values_to_mask[~na_mask]
96
97 # GH 21977
98 mask = np.zeros(arr.shape, dtype=bool)
99 for x in nonna:
100 if is_numeric_v_string_like(arr, x):
101 # GH#29553 prevent numpy deprecation warnings
102 pass
103 else:
104 if potential_na:
105 new_mask = np.zeros(arr.shape, dtype=np.bool_)
106 new_mask[arr_mask] = arr[arr_mask] == x
107 else:
108 new_mask = arr == x
109
110 if not isinstance(new_mask, np.ndarray):
111 # usually BooleanArray
112 new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
113 mask |= new_mask
114
115 if na_mask.any():
116 mask |= isna(arr)
117
118 return mask
119
120
121def clean_fill_method(method: str | None, allow_nearest: bool = False):
122 # asfreq is compat for resampling
123 if method in [None, "asfreq"]:
124 return None
125
126 if isinstance(method, str):
127 method = method.lower()
128 if method == "ffill":
129 method = "pad"
130 elif method == "bfill":
131 method = "backfill"
132
133 valid_methods = ["pad", "backfill"]
134 expecting = "pad (ffill) or backfill (bfill)"
135 if allow_nearest:
136 valid_methods.append("nearest")
137 expecting = "pad (ffill), backfill (bfill) or nearest"
138 if method not in valid_methods:
139 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")
140 return method
141
142
143# interpolation methods that dispatch to np.interp
144
145NP_METHODS = ["linear", "time", "index", "values"]
146
147# interpolation methods that dispatch to _interpolate_scipy_wrapper
148
149SP_METHODS = [
150 "nearest",
151 "zero",
152 "slinear",
153 "quadratic",
154 "cubic",
155 "barycentric",
156 "krogh",
157 "spline",
158 "polynomial",
159 "from_derivatives",
160 "piecewise_polynomial",
161 "pchip",
162 "akima",
163 "cubicspline",
164]
165
166
167def clean_interp_method(method: str, index: Index, **kwargs) -> str:
168 order = kwargs.get("order")
169
170 if method in ("spline", "polynomial") and order is None:
171 raise ValueError("You must specify the order of the spline or polynomial.")
172
173 valid = NP_METHODS + SP_METHODS
174 if method not in valid:
175 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")
176
177 if method in ("krogh", "piecewise_polynomial", "pchip"):
178 if not index.is_monotonic_increasing:
179 raise ValueError(
180 f"{method} interpolation requires that the index be monotonic."
181 )
182
183 return method
184
185
186def find_valid_index(
187 values, *, how: str, is_valid: npt.NDArray[np.bool_]
188) -> int | None:
189 """
190 Retrieves the index of the first valid value.
191
192 Parameters
193 ----------
194 values : ndarray or ExtensionArray
195 how : {'first', 'last'}
196 Use this parameter to change between the first or last valid index.
197 is_valid: np.ndarray
198 Mask to find na_values.
199
200 Returns
201 -------
202 int or None
203 """
204 assert how in ["first", "last"]
205
206 if len(values) == 0: # early stop
207 return None
208
209 if values.ndim == 2:
210 is_valid = is_valid.any(axis=1) # reduce axis 1
211
212 if how == "first":
213 idxpos = is_valid[::].argmax()
214
215 elif how == "last":
216 idxpos = len(values) - 1 - is_valid[::-1].argmax()
217
218 chk_notna = is_valid[idxpos]
219
220 if not chk_notna:
221 return None
222 # Incompatible return value type (got "signedinteger[Any]",
223 # expected "Optional[int]")
224 return idxpos # type: ignore[return-value]
225
226
227def interpolate_array_2d(
228 data: np.ndarray,
229 method: str = "pad",
230 axis: AxisInt = 0,
231 index: Index | None = None,
232 limit: int | None = None,
233 limit_direction: str = "forward",
234 limit_area: str | None = None,
235 fill_value: Any | None = None,
236 coerce: bool = False,
237 downcast: str | None = None,
238 **kwargs,
239) -> None:
240 """
241 Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill.
242
243 Notes
244 -----
245 Alters 'data' in-place.
246 """
247 try:
248 m = clean_fill_method(method)
249 except ValueError:
250 m = None
251
252 if m is not None:
253 if fill_value is not None:
254 # similar to validate_fillna_kwargs
255 raise ValueError("Cannot pass both fill_value and method")
256
257 interpolate_2d(
258 data,
259 method=m,
260 axis=axis,
261 limit=limit,
262 limit_area=limit_area,
263 )
264 else:
265 assert index is not None # for mypy
266
267 _interpolate_2d_with_fill(
268 data=data,
269 index=index,
270 axis=axis,
271 method=method,
272 limit=limit,
273 limit_direction=limit_direction,
274 limit_area=limit_area,
275 fill_value=fill_value,
276 **kwargs,
277 )
278
279
280def _interpolate_2d_with_fill(
281 data: np.ndarray, # floating dtype
282 index: Index,
283 axis: AxisInt,
284 method: str = "linear",
285 limit: int | None = None,
286 limit_direction: str = "forward",
287 limit_area: str | None = None,
288 fill_value: Any | None = None,
289 **kwargs,
290) -> None:
291 """
292 Column-wise application of _interpolate_1d.
293
294 Notes
295 -----
296 Alters 'data' in-place.
297
298 The signature does differ from _interpolate_1d because it only
299 includes what is needed for Block.interpolate.
300 """
301 # validate the interp method
302 clean_interp_method(method, index, **kwargs)
303
304 if is_valid_na_for_dtype(fill_value, data.dtype):
305 fill_value = na_value_for_dtype(data.dtype, compat=False)
306
307 if method == "time":
308 if not needs_i8_conversion(index.dtype):
309 raise ValueError(
310 "time-weighted interpolation only works "
311 "on Series or DataFrames with a "
312 "DatetimeIndex"
313 )
314 method = "values"
315
316 valid_limit_directions = ["forward", "backward", "both"]
317 limit_direction = limit_direction.lower()
318 if limit_direction not in valid_limit_directions:
319 raise ValueError(
320 "Invalid limit_direction: expecting one of "
321 f"{valid_limit_directions}, got '{limit_direction}'."
322 )
323
324 if limit_area is not None:
325 valid_limit_areas = ["inside", "outside"]
326 limit_area = limit_area.lower()
327 if limit_area not in valid_limit_areas:
328 raise ValueError(
329 f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
330 f"{limit_area}."
331 )
332
333 # default limit is unlimited GH #16282
334 limit = algos.validate_limit(nobs=None, limit=limit)
335
336 indices = _index_to_interp_indices(index, method)
337
338 def func(yvalues: np.ndarray) -> None:
339 # process 1-d slices in the axis direction
340
341 _interpolate_1d(
342 indices=indices,
343 yvalues=yvalues,
344 method=method,
345 limit=limit,
346 limit_direction=limit_direction,
347 limit_area=limit_area,
348 fill_value=fill_value,
349 bounds_error=False,
350 **kwargs,
351 )
352
353 # error: Argument 1 to "apply_along_axis" has incompatible type
354 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[...,
355 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray
356 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],
357 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],
358 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]"
359 np.apply_along_axis(func, axis, data) # type: ignore[arg-type]
360
361
362def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:
363 """
364 Convert Index to ndarray of indices to pass to NumPy/SciPy.
365 """
366 xarr = index._values
367 if needs_i8_conversion(xarr.dtype):
368 # GH#1646 for dt64tz
369 xarr = xarr.view("i8")
370
371 if method == "linear":
372 inds = xarr
373 inds = cast(np.ndarray, inds)
374 else:
375 inds = np.asarray(xarr)
376
377 if method in ("values", "index"):
378 if inds.dtype == np.object_:
379 inds = lib.maybe_convert_objects(inds)
380
381 return inds
382
383
384def _interpolate_1d(
385 indices: np.ndarray,
386 yvalues: np.ndarray,
387 method: str | None = "linear",
388 limit: int | None = None,
389 limit_direction: str = "forward",
390 limit_area: str | None = None,
391 fill_value: Any | None = None,
392 bounds_error: bool = False,
393 order: int | None = None,
394 **kwargs,
395) -> None:
396 """
397 Logic for the 1-d interpolation. The input
398 indices and yvalues will each be 1-d arrays of the same length.
399
400 Bounds_error is currently hardcoded to False since non-scipy ones don't
401 take it as an argument.
402
403 Notes
404 -----
405 Fills 'yvalues' in-place.
406 """
407
408 invalid = isna(yvalues)
409 valid = ~invalid
410
411 if not valid.any():
412 return
413
414 if valid.all():
415 return
416
417 # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
418 all_nans = set(np.flatnonzero(invalid))
419
420 first_valid_index = find_valid_index(yvalues, how="first", is_valid=valid)
421 if first_valid_index is None: # no nan found in start
422 first_valid_index = 0
423 start_nans = set(range(first_valid_index))
424
425 last_valid_index = find_valid_index(yvalues, how="last", is_valid=valid)
426 if last_valid_index is None: # no nan found in end
427 last_valid_index = len(yvalues)
428 end_nans = set(range(1 + last_valid_index, len(valid)))
429
430 # Like the sets above, preserve_nans contains indices of invalid values,
431 # but in this case, it is the final set of indices that need to be
432 # preserved as NaN after the interpolation.
433
434 # For example if limit_direction='forward' then preserve_nans will
435 # contain indices of NaNs at the beginning of the series, and NaNs that
436 # are more than 'limit' away from the prior non-NaN.
437
438 # set preserve_nans based on direction using _interp_limit
439 preserve_nans: list | set
440 if limit_direction == "forward":
441 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
442 elif limit_direction == "backward":
443 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
444 else:
445 # both directions... just use _interp_limit
446 preserve_nans = set(_interp_limit(invalid, limit, limit))
447
448 # if limit_area is set, add either mid or outside indices
449 # to preserve_nans GH #16284
450 if limit_area == "inside":
451 # preserve NaNs on the outside
452 preserve_nans |= start_nans | end_nans
453 elif limit_area == "outside":
454 # preserve NaNs on the inside
455 mid_nans = all_nans - start_nans - end_nans
456 preserve_nans |= mid_nans
457
458 # sort preserve_nans and convert to list
459 preserve_nans = sorted(preserve_nans)
460
461 is_datetimelike = needs_i8_conversion(yvalues.dtype)
462
463 if is_datetimelike:
464 yvalues = yvalues.view("i8")
465
466 if method in NP_METHODS:
467 # np.interp requires sorted X values, #21037
468
469 indexer = np.argsort(indices[valid])
470 yvalues[invalid] = np.interp(
471 indices[invalid], indices[valid][indexer], yvalues[valid][indexer]
472 )
473 else:
474 yvalues[invalid] = _interpolate_scipy_wrapper(
475 indices[valid],
476 yvalues[valid],
477 indices[invalid],
478 method=method,
479 fill_value=fill_value,
480 bounds_error=bounds_error,
481 order=order,
482 **kwargs,
483 )
484
485 if is_datetimelike:
486 yvalues[preserve_nans] = NaT.value
487 else:
488 yvalues[preserve_nans] = np.nan
489 return
490
491
492def _interpolate_scipy_wrapper(
493 x,
494 y,
495 new_x,
496 method,
497 fill_value=None,
498 bounds_error: bool = False,
499 order=None,
500 **kwargs,
501):
502 """
503 Passed off to scipy.interpolate.interp1d. method is scipy's kind.
504 Returns an array interpolated at new_x. Add any new methods to
505 the list in _clean_interp_method.
506 """
507 extra = f"{method} interpolation requires SciPy."
508 import_optional_dependency("scipy", extra=extra)
509 from scipy import interpolate
510
511 new_x = np.asarray(new_x)
512
513 # ignores some kwargs that could be passed along.
514 alt_methods = {
515 "barycentric": interpolate.barycentric_interpolate,
516 "krogh": interpolate.krogh_interpolate,
517 "from_derivatives": _from_derivatives,
518 "piecewise_polynomial": _from_derivatives,
519 }
520
521 if getattr(x, "_is_all_dates", False):
522 # GH 5975, scipy.interp1d can't handle datetime64s
523 x, new_x = x._values.astype("i8"), new_x.astype("i8")
524
525 if method == "pchip":
526 alt_methods["pchip"] = interpolate.pchip_interpolate
527 elif method == "akima":
528 alt_methods["akima"] = _akima_interpolate
529 elif method == "cubicspline":
530 alt_methods["cubicspline"] = _cubicspline_interpolate
531
532 interp1d_methods = [
533 "nearest",
534 "zero",
535 "slinear",
536 "quadratic",
537 "cubic",
538 "polynomial",
539 ]
540 if method in interp1d_methods:
541 if method == "polynomial":
542 method = order
543 terp = interpolate.interp1d(
544 x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error
545 )
546 new_y = terp(new_x)
547 elif method == "spline":
548 # GH #10633, #24014
549 if isna(order) or (order <= 0):
550 raise ValueError(
551 f"order needs to be specified and greater than 0; got order: {order}"
552 )
553 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)
554 new_y = terp(new_x)
555 else:
556 # GH 7295: need to be able to write for some reason
557 # in some circumstances: check all three
558 if not x.flags.writeable:
559 x = x.copy()
560 if not y.flags.writeable:
561 y = y.copy()
562 if not new_x.flags.writeable:
563 new_x = new_x.copy()
564 method = alt_methods[method]
565 new_y = method(x, y, new_x, **kwargs)
566 return new_y
567
568
569def _from_derivatives(
570 xi, yi, x, order=None, der: int | list[int] | None = 0, extrapolate: bool = False
571):
572 """
573 Convenience function for interpolate.BPoly.from_derivatives.
574
575 Construct a piecewise polynomial in the Bernstein basis, compatible
576 with the specified values and derivatives at breakpoints.
577
578 Parameters
579 ----------
580 xi : array-like
581 sorted 1D array of x-coordinates
582 yi : array-like or list of array-likes
583 yi[i][j] is the j-th derivative known at xi[i]
584 order: None or int or array-like of ints. Default: None.
585 Specifies the degree of local polynomials. If not None, some
586 derivatives are ignored.
587 der : int or list
588 How many derivatives to extract; None for all potentially nonzero
589 derivatives (that is a number equal to the number of points), or a
590 list of derivatives to extract. This number includes the function
591 value as 0th derivative.
592 extrapolate : bool, optional
593 Whether to extrapolate to ouf-of-bounds points based on first and last
594 intervals, or to return NaNs. Default: True.
595
596 See Also
597 --------
598 scipy.interpolate.BPoly.from_derivatives
599
600 Returns
601 -------
602 y : scalar or array-like
603 The result, of length R or length M or M by R.
604 """
605 from scipy import interpolate
606
607 # return the method for compat with scipy version & backwards compat
608 method = interpolate.BPoly.from_derivatives
609 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate)
610
611 return m(x)
612
613
614def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt = 0):
615 """
616 Convenience function for akima interpolation.
617 xi and yi are arrays of values used to approximate some function f,
618 with ``yi = f(xi)``.
619
620 See `Akima1DInterpolator` for details.
621
622 Parameters
623 ----------
624 xi : array-like
625 A sorted list of x-coordinates, of length N.
626 yi : array-like
627 A 1-D array of real values. `yi`'s length along the interpolation
628 axis must be equal to the length of `xi`. If N-D array, use axis
629 parameter to select correct axis.
630 x : scalar or array-like
631 Of length M.
632 der : int, optional
633 How many derivatives to extract; None for all potentially
634 nonzero derivatives (that is a number equal to the number
635 of points), or a list of derivatives to extract. This number
636 includes the function value as 0th derivative.
637 axis : int, optional
638 Axis in the yi array corresponding to the x-coordinate values.
639
640 See Also
641 --------
642 scipy.interpolate.Akima1DInterpolator
643
644 Returns
645 -------
646 y : scalar or array-like
647 The result, of length R or length M or M by R,
648
649 """
650 from scipy import interpolate
651
652 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis)
653
654 return P(x, nu=der)
655
656
657def _cubicspline_interpolate(
658 xi,
659 yi,
660 x,
661 axis: AxisInt = 0,
662 bc_type: str | tuple[Any, Any] = "not-a-knot",
663 extrapolate=None,
664):
665 """
666 Convenience function for cubic spline data interpolator.
667
668 See `scipy.interpolate.CubicSpline` for details.
669
670 Parameters
671 ----------
672 xi : array-like, shape (n,)
673 1-d array containing values of the independent variable.
674 Values must be real, finite and in strictly increasing order.
675 yi : array-like
676 Array containing values of the dependent variable. It can have
677 arbitrary number of dimensions, but the length along ``axis``
678 (see below) must match the length of ``x``. Values must be finite.
679 x : scalar or array-like, shape (m,)
680 axis : int, optional
681 Axis along which `y` is assumed to be varying. Meaning that for
682 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
683 Default is 0.
684 bc_type : string or 2-tuple, optional
685 Boundary condition type. Two additional equations, given by the
686 boundary conditions, are required to determine all coefficients of
687 polynomials on each segment [2]_.
688 If `bc_type` is a string, then the specified condition will be applied
689 at both ends of a spline. Available conditions are:
690 * 'not-a-knot' (default): The first and second segment at a curve end
691 are the same polynomial. It is a good default when there is no
692 information on boundary conditions.
693 * 'periodic': The interpolated functions is assumed to be periodic
694 of period ``x[-1] - x[0]``. The first and last value of `y` must be
695 identical: ``y[0] == y[-1]``. This boundary condition will result in
696 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.
697 * 'clamped': The first derivative at curves ends are zero. Assuming
698 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.
699 * 'natural': The second derivative at curve ends are zero. Assuming
700 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.
701 If `bc_type` is a 2-tuple, the first and the second value will be
702 applied at the curve start and end respectively. The tuple values can
703 be one of the previously mentioned strings (except 'periodic') or a
704 tuple `(order, deriv_values)` allowing to specify arbitrary
705 derivatives at curve ends:
706 * `order`: the derivative order, 1 or 2.
707 * `deriv_value`: array-like containing derivative values, shape must
708 be the same as `y`, excluding ``axis`` dimension. For example, if
709 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
710 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
711 and have the shape (n0, n1).
712 extrapolate : {bool, 'periodic', None}, optional
713 If bool, determines whether to extrapolate to out-of-bounds points
714 based on first and last intervals, or to return NaNs. If 'periodic',
715 periodic extrapolation is used. If None (default), ``extrapolate`` is
716 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.
717
718 See Also
719 --------
720 scipy.interpolate.CubicHermiteSpline
721
722 Returns
723 -------
724 y : scalar or array-like
725 The result, of shape (m,)
726
727 References
728 ----------
729 .. [1] `Cubic Spline Interpolation
730 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_
731 on Wikiversity.
732 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.
733 """
734 from scipy import interpolate
735
736 P = interpolate.CubicSpline(
737 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate
738 )
739
740 return P(x)
741
742
743def _interpolate_with_limit_area(
744 values: np.ndarray, method: str, limit: int | None, limit_area: str | None
745) -> None:
746 """
747 Apply interpolation and limit_area logic to values along a to-be-specified axis.
748
749 Parameters
750 ----------
751 values: np.ndarray
752 Input array.
753 method: str
754 Interpolation method. Could be "bfill" or "pad"
755 limit: int, optional
756 Index limit on interpolation.
757 limit_area: str
758 Limit area for interpolation. Can be "inside" or "outside"
759
760 Notes
761 -----
762 Modifies values in-place.
763 """
764
765 invalid = isna(values)
766 is_valid = ~invalid
767
768 if not invalid.all():
769 first = find_valid_index(values, how="first", is_valid=is_valid)
770 if first is None:
771 first = 0
772 last = find_valid_index(values, how="last", is_valid=is_valid)
773 if last is None:
774 last = len(values)
775
776 interpolate_2d(
777 values,
778 method=method,
779 limit=limit,
780 )
781
782 if limit_area == "inside":
783 invalid[first : last + 1] = False
784 elif limit_area == "outside":
785 invalid[:first] = invalid[last + 1 :] = False
786
787 values[invalid] = np.nan
788
789
790def interpolate_2d(
791 values: np.ndarray,
792 method: str = "pad",
793 axis: Axis = 0,
794 limit: int | None = None,
795 limit_area: str | None = None,
796) -> None:
797 """
798 Perform an actual interpolation of values, values will be make 2-d if
799 needed fills inplace, returns the result.
800
801 Parameters
802 ----------
803 values: np.ndarray
804 Input array.
805 method: str, default "pad"
806 Interpolation method. Could be "bfill" or "pad"
807 axis: 0 or 1
808 Interpolation axis
809 limit: int, optional
810 Index limit on interpolation.
811 limit_area: str, optional
812 Limit area for interpolation. Can be "inside" or "outside"
813
814 Notes
815 -----
816 Modifies values in-place.
817 """
818 if limit_area is not None:
819 np.apply_along_axis(
820 # error: Argument 1 to "apply_along_axis" has incompatible type
821 # "partial[None]"; expected
822 # "Callable[..., Union[_SupportsArray[dtype[<nothing>]],
823 # Sequence[_SupportsArray[dtype[<nothing>]]],
824 # Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],
825 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],
826 # Sequence[Sequence[Sequence[Sequence[_
827 # SupportsArray[dtype[<nothing>]]]]]]]]"
828 partial( # type: ignore[arg-type]
829 _interpolate_with_limit_area,
830 method=method,
831 limit=limit,
832 limit_area=limit_area,
833 ),
834 # error: Argument 2 to "apply_along_axis" has incompatible type
835 # "Union[str, int]"; expected "SupportsIndex"
836 axis, # type: ignore[arg-type]
837 values,
838 )
839 return
840
841 transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
842
843 # reshape a 1 dim if needed
844 if values.ndim == 1:
845 if axis != 0: # pragma: no cover
846 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")
847 values = values.reshape(tuple((1,) + values.shape))
848
849 method = clean_fill_method(method)
850 tvalues = transf(values)
851
852 # _pad_2d and _backfill_2d both modify tvalues inplace
853 if method == "pad":
854 _pad_2d(tvalues, limit=limit)
855 else:
856 _backfill_2d(tvalues, limit=limit)
857
858 return
859
860
861def _fillna_prep(
862 values, mask: npt.NDArray[np.bool_] | None = None
863) -> npt.NDArray[np.bool_]:
864 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
865
866 if mask is None:
867 mask = isna(values)
868
869 mask = mask.view(np.uint8)
870 return mask
871
872
873def _datetimelike_compat(func: F) -> F:
874 """
875 Wrapper to handle datetime64 and timedelta64 dtypes.
876 """
877
878 @wraps(func)
879 def new_func(values, limit=None, mask=None):
880 if needs_i8_conversion(values.dtype):
881 if mask is None:
882 # This needs to occur before casting to int64
883 mask = isna(values)
884
885 result, mask = func(values.view("i8"), limit=limit, mask=mask)
886 return result.view(values.dtype), mask
887
888 return func(values, limit=limit, mask=mask)
889
890 return cast(F, new_func)
891
892
893@_datetimelike_compat
894def _pad_1d(
895 values: np.ndarray,
896 limit: int | None = None,
897 mask: npt.NDArray[np.bool_] | None = None,
898) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
899 mask = _fillna_prep(values, mask)
900 algos.pad_inplace(values, mask, limit=limit)
901 return values, mask
902
903
904@_datetimelike_compat
905def _backfill_1d(
906 values: np.ndarray,
907 limit: int | None = None,
908 mask: npt.NDArray[np.bool_] | None = None,
909) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
910 mask = _fillna_prep(values, mask)
911 algos.backfill_inplace(values, mask, limit=limit)
912 return values, mask
913
914
915@_datetimelike_compat
916def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None):
917 mask = _fillna_prep(values, mask)
918
919 if np.all(values.shape):
920 algos.pad_2d_inplace(values, mask, limit=limit)
921 else:
922 # for test coverage
923 pass
924 return values, mask
925
926
927@_datetimelike_compat
928def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None):
929 mask = _fillna_prep(values, mask)
930
931 if np.all(values.shape):
932 algos.backfill_2d_inplace(values, mask, limit=limit)
933 else:
934 # for test coverage
935 pass
936 return values, mask
937
938
939_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}
940
941
942def get_fill_func(method, ndim: int = 1):
943 method = clean_fill_method(method)
944 if ndim == 1:
945 return _fill_methods[method]
946 return {"pad": _pad_2d, "backfill": _backfill_2d}[method]
947
948
949def clean_reindex_fill_method(method) -> str | None:
950 return clean_fill_method(method, allow_nearest=True)
951
952
953def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit):
954 """
955 Get indexers of values that won't be filled
956 because they exceed the limits.
957
958 Parameters
959 ----------
960 invalid : np.ndarray[bool]
961 fw_limit : int or None
962 forward limit to index
963 bw_limit : int or None
964 backward limit to index
965
966 Returns
967 -------
968 set of indexers
969
970 Notes
971 -----
972 This is equivalent to the more readable, but slower
973
974 .. code-block:: python
975
976 def _interp_limit(invalid, fw_limit, bw_limit):
977 for x in np.where(invalid)[0]:
978 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
979 yield x
980 """
981 # handle forward first; the backward direction is the same except
982 # 1. operate on the reversed array
983 # 2. subtract the returned indices from N - 1
984 N = len(invalid)
985 f_idx = set()
986 b_idx = set()
987
988 def inner(invalid, limit):
989 limit = min(limit, N)
990 windowed = _rolling_window(invalid, limit + 1).all(1)
991 idx = set(np.where(windowed)[0] + limit) | set(
992 np.where((~invalid[: limit + 1]).cumsum() == 0)[0]
993 )
994 return idx
995
996 if fw_limit is not None:
997 if fw_limit == 0:
998 f_idx = set(np.where(invalid)[0])
999 else:
1000 f_idx = inner(invalid, fw_limit)
1001
1002 if bw_limit is not None:
1003 if bw_limit == 0:
1004 # then we don't even need to care about backwards
1005 # just use forwards
1006 return f_idx
1007 else:
1008 b_idx_inv = list(inner(invalid[::-1], bw_limit))
1009 b_idx = set(N - 1 - np.asarray(b_idx_inv))
1010 if fw_limit == 0:
1011 return b_idx
1012
1013 return f_idx & b_idx
1014
1015
1016def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]:
1017 """
1018 [True, True, False, True, False], 2 ->
1019
1020 [
1021 [True, True],
1022 [True, False],
1023 [False, True],
1024 [True, False],
1025 ]
1026 """
1027 # https://stackoverflow.com/a/6811241
1028 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
1029 strides = a.strides + (a.strides[-1],)
1030 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)