1"""
2Routines for filling missing data.
3"""
4from __future__ import annotations
5
6from functools import wraps
7from typing import (
8 TYPE_CHECKING,
9 Any,
10 Literal,
11 cast,
12 overload,
13)
14
15import numpy as np
16
17from pandas._libs import (
18 NaT,
19 algos,
20 lib,
21)
22from pandas._typing import (
23 ArrayLike,
24 AxisInt,
25 F,
26 ReindexMethod,
27 npt,
28)
29from pandas.compat._optional import import_optional_dependency
30
31from pandas.core.dtypes.cast import infer_dtype_from
32from pandas.core.dtypes.common import (
33 is_array_like,
34 is_bool_dtype,
35 is_numeric_dtype,
36 is_numeric_v_string_like,
37 is_object_dtype,
38 needs_i8_conversion,
39)
40from pandas.core.dtypes.dtypes import DatetimeTZDtype
41from pandas.core.dtypes.missing import (
42 is_valid_na_for_dtype,
43 isna,
44 na_value_for_dtype,
45)
46
47if TYPE_CHECKING:
48 from pandas import Index
49
50
51def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):
52 """
53 Validate the size of the values passed to ExtensionArray.fillna.
54 """
55 if is_array_like(value):
56 if len(value) != length:
57 raise ValueError(
58 f"Length of 'value' does not match. Got ({len(value)}) "
59 f" expected {length}"
60 )
61 value = value[mask]
62
63 return value
64
65
66def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
67 """
68 Return a masking array of same size/shape as arr
69 with entries equaling any member of values_to_mask set to True
70
71 Parameters
72 ----------
73 arr : ArrayLike
74 values_to_mask: list, tuple, or scalar
75
76 Returns
77 -------
78 np.ndarray[bool]
79 """
80 # When called from Block.replace/replace_list, values_to_mask is a scalar
81 # known to be holdable by arr.
82 # When called from Series._single_replace, values_to_mask is tuple or list
83 dtype, values_to_mask = infer_dtype_from(values_to_mask)
84
85 if isinstance(dtype, np.dtype):
86 values_to_mask = np.array(values_to_mask, dtype=dtype)
87 else:
88 cls = dtype.construct_array_type()
89 if not lib.is_list_like(values_to_mask):
90 values_to_mask = [values_to_mask]
91 values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False)
92
93 potential_na = False
94 if is_object_dtype(arr.dtype):
95 # pre-compute mask to avoid comparison to NA
96 potential_na = True
97 arr_mask = ~isna(arr)
98
99 na_mask = isna(values_to_mask)
100 nonna = values_to_mask[~na_mask]
101
102 # GH 21977
103 mask = np.zeros(arr.shape, dtype=bool)
104 if (
105 is_numeric_dtype(arr.dtype)
106 and not is_bool_dtype(arr.dtype)
107 and is_bool_dtype(nonna.dtype)
108 ):
109 pass
110 elif (
111 is_bool_dtype(arr.dtype)
112 and is_numeric_dtype(nonna.dtype)
113 and not is_bool_dtype(nonna.dtype)
114 ):
115 pass
116 else:
117 for x in nonna:
118 if is_numeric_v_string_like(arr, x):
119 # GH#29553 prevent numpy deprecation warnings
120 pass
121 else:
122 if potential_na:
123 new_mask = np.zeros(arr.shape, dtype=np.bool_)
124 new_mask[arr_mask] = arr[arr_mask] == x
125 else:
126 new_mask = arr == x
127
128 if not isinstance(new_mask, np.ndarray):
129 # usually BooleanArray
130 new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
131 mask |= new_mask
132
133 if na_mask.any():
134 mask |= isna(arr)
135
136 return mask
137
138
139@overload
140def clean_fill_method(
141 method: Literal["ffill", "pad", "bfill", "backfill"],
142 *,
143 allow_nearest: Literal[False] = ...,
144) -> Literal["pad", "backfill"]:
145 ...
146
147
148@overload
149def clean_fill_method(
150 method: Literal["ffill", "pad", "bfill", "backfill", "nearest"],
151 *,
152 allow_nearest: Literal[True],
153) -> Literal["pad", "backfill", "nearest"]:
154 ...
155
156
157def clean_fill_method(
158 method: Literal["ffill", "pad", "bfill", "backfill", "nearest"],
159 *,
160 allow_nearest: bool = False,
161) -> Literal["pad", "backfill", "nearest"]:
162 if isinstance(method, str):
163 # error: Incompatible types in assignment (expression has type "str", variable
164 # has type "Literal['ffill', 'pad', 'bfill', 'backfill', 'nearest']")
165 method = method.lower() # type: ignore[assignment]
166 if method == "ffill":
167 method = "pad"
168 elif method == "bfill":
169 method = "backfill"
170
171 valid_methods = ["pad", "backfill"]
172 expecting = "pad (ffill) or backfill (bfill)"
173 if allow_nearest:
174 valid_methods.append("nearest")
175 expecting = "pad (ffill), backfill (bfill) or nearest"
176 if method not in valid_methods:
177 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")
178 return method
179
180
181# interpolation methods that dispatch to np.interp
182
183NP_METHODS = ["linear", "time", "index", "values"]
184
185# interpolation methods that dispatch to _interpolate_scipy_wrapper
186
187SP_METHODS = [
188 "nearest",
189 "zero",
190 "slinear",
191 "quadratic",
192 "cubic",
193 "barycentric",
194 "krogh",
195 "spline",
196 "polynomial",
197 "from_derivatives",
198 "piecewise_polynomial",
199 "pchip",
200 "akima",
201 "cubicspline",
202]
203
204
205def clean_interp_method(method: str, index: Index, **kwargs) -> str:
206 order = kwargs.get("order")
207
208 if method in ("spline", "polynomial") and order is None:
209 raise ValueError("You must specify the order of the spline or polynomial.")
210
211 valid = NP_METHODS + SP_METHODS
212 if method not in valid:
213 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")
214
215 if method in ("krogh", "piecewise_polynomial", "pchip"):
216 if not index.is_monotonic_increasing:
217 raise ValueError(
218 f"{method} interpolation requires that the index be monotonic."
219 )
220
221 return method
222
223
224def find_valid_index(how: str, is_valid: npt.NDArray[np.bool_]) -> int | None:
225 """
226 Retrieves the positional index of the first valid value.
227
228 Parameters
229 ----------
230 how : {'first', 'last'}
231 Use this parameter to change between the first or last valid index.
232 is_valid: np.ndarray
233 Mask to find na_values.
234
235 Returns
236 -------
237 int or None
238 """
239 assert how in ["first", "last"]
240
241 if len(is_valid) == 0: # early stop
242 return None
243
244 if is_valid.ndim == 2:
245 is_valid = is_valid.any(axis=1) # reduce axis 1
246
247 if how == "first":
248 idxpos = is_valid[::].argmax()
249
250 elif how == "last":
251 idxpos = len(is_valid) - 1 - is_valid[::-1].argmax()
252
253 chk_notna = is_valid[idxpos]
254
255 if not chk_notna:
256 return None
257 # Incompatible return value type (got "signedinteger[Any]",
258 # expected "Optional[int]")
259 return idxpos # type: ignore[return-value]
260
261
262def validate_limit_direction(
263 limit_direction: str,
264) -> Literal["forward", "backward", "both"]:
265 valid_limit_directions = ["forward", "backward", "both"]
266 limit_direction = limit_direction.lower()
267 if limit_direction not in valid_limit_directions:
268 raise ValueError(
269 "Invalid limit_direction: expecting one of "
270 f"{valid_limit_directions}, got '{limit_direction}'."
271 )
272 # error: Incompatible return value type (got "str", expected
273 # "Literal['forward', 'backward', 'both']")
274 return limit_direction # type: ignore[return-value]
275
276
277def validate_limit_area(limit_area: str | None) -> Literal["inside", "outside"] | None:
278 if limit_area is not None:
279 valid_limit_areas = ["inside", "outside"]
280 limit_area = limit_area.lower()
281 if limit_area not in valid_limit_areas:
282 raise ValueError(
283 f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
284 f"{limit_area}."
285 )
286 # error: Incompatible return value type (got "Optional[str]", expected
287 # "Optional[Literal['inside', 'outside']]")
288 return limit_area # type: ignore[return-value]
289
290
291def infer_limit_direction(
292 limit_direction: Literal["backward", "forward", "both"] | None, method: str
293) -> Literal["backward", "forward", "both"]:
294 # Set `limit_direction` depending on `method`
295 if limit_direction is None:
296 if method in ("backfill", "bfill"):
297 limit_direction = "backward"
298 else:
299 limit_direction = "forward"
300 else:
301 if method in ("pad", "ffill") and limit_direction != "forward":
302 raise ValueError(
303 f"`limit_direction` must be 'forward' for method `{method}`"
304 )
305 if method in ("backfill", "bfill") and limit_direction != "backward":
306 raise ValueError(
307 f"`limit_direction` must be 'backward' for method `{method}`"
308 )
309 return limit_direction
310
311
312def get_interp_index(method, index: Index) -> Index:
313 # create/use the index
314 if method == "linear":
315 # prior default
316 from pandas import Index
317
318 index = Index(np.arange(len(index)))
319 else:
320 methods = {"index", "values", "nearest", "time"}
321 is_numeric_or_datetime = (
322 is_numeric_dtype(index.dtype)
323 or isinstance(index.dtype, DatetimeTZDtype)
324 or lib.is_np_dtype(index.dtype, "mM")
325 )
326 if method not in methods and not is_numeric_or_datetime:
327 raise ValueError(
328 "Index column must be numeric or datetime type when "
329 f"using {method} method other than linear. "
330 "Try setting a numeric or datetime index column before "
331 "interpolating."
332 )
333
334 if isna(index).any():
335 raise NotImplementedError(
336 "Interpolation with NaNs in the index "
337 "has not been implemented. Try filling "
338 "those NaNs before interpolating."
339 )
340 return index
341
342
343def interpolate_2d_inplace(
344 data: np.ndarray, # floating dtype
345 index: Index,
346 axis: AxisInt,
347 method: str = "linear",
348 limit: int | None = None,
349 limit_direction: str = "forward",
350 limit_area: str | None = None,
351 fill_value: Any | None = None,
352 mask=None,
353 **kwargs,
354) -> None:
355 """
356 Column-wise application of _interpolate_1d.
357
358 Notes
359 -----
360 Alters 'data' in-place.
361
362 The signature does differ from _interpolate_1d because it only
363 includes what is needed for Block.interpolate.
364 """
365 # validate the interp method
366 clean_interp_method(method, index, **kwargs)
367
368 if is_valid_na_for_dtype(fill_value, data.dtype):
369 fill_value = na_value_for_dtype(data.dtype, compat=False)
370
371 if method == "time":
372 if not needs_i8_conversion(index.dtype):
373 raise ValueError(
374 "time-weighted interpolation only works "
375 "on Series or DataFrames with a "
376 "DatetimeIndex"
377 )
378 method = "values"
379
380 limit_direction = validate_limit_direction(limit_direction)
381 limit_area_validated = validate_limit_area(limit_area)
382
383 # default limit is unlimited GH #16282
384 limit = algos.validate_limit(nobs=None, limit=limit)
385
386 indices = _index_to_interp_indices(index, method)
387
388 def func(yvalues: np.ndarray) -> None:
389 # process 1-d slices in the axis direction
390
391 _interpolate_1d(
392 indices=indices,
393 yvalues=yvalues,
394 method=method,
395 limit=limit,
396 limit_direction=limit_direction,
397 limit_area=limit_area_validated,
398 fill_value=fill_value,
399 bounds_error=False,
400 mask=mask,
401 **kwargs,
402 )
403
404 # error: Argument 1 to "apply_along_axis" has incompatible type
405 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[...,
406 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray
407 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],
408 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],
409 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]"
410 np.apply_along_axis(func, axis, data) # type: ignore[arg-type]
411
412
413def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:
414 """
415 Convert Index to ndarray of indices to pass to NumPy/SciPy.
416 """
417 xarr = index._values
418 if needs_i8_conversion(xarr.dtype):
419 # GH#1646 for dt64tz
420 xarr = xarr.view("i8")
421
422 if method == "linear":
423 inds = xarr
424 inds = cast(np.ndarray, inds)
425 else:
426 inds = np.asarray(xarr)
427
428 if method in ("values", "index"):
429 if inds.dtype == np.object_:
430 inds = lib.maybe_convert_objects(inds)
431
432 return inds
433
434
435def _interpolate_1d(
436 indices: np.ndarray,
437 yvalues: np.ndarray,
438 method: str = "linear",
439 limit: int | None = None,
440 limit_direction: str = "forward",
441 limit_area: Literal["inside", "outside"] | None = None,
442 fill_value: Any | None = None,
443 bounds_error: bool = False,
444 order: int | None = None,
445 mask=None,
446 **kwargs,
447) -> None:
448 """
449 Logic for the 1-d interpolation. The input
450 indices and yvalues will each be 1-d arrays of the same length.
451
452 Bounds_error is currently hardcoded to False since non-scipy ones don't
453 take it as an argument.
454
455 Notes
456 -----
457 Fills 'yvalues' in-place.
458 """
459 if mask is not None:
460 invalid = mask
461 else:
462 invalid = isna(yvalues)
463 valid = ~invalid
464
465 if not valid.any():
466 return
467
468 if valid.all():
469 return
470
471 # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
472 all_nans = set(np.flatnonzero(invalid))
473
474 first_valid_index = find_valid_index(how="first", is_valid=valid)
475 if first_valid_index is None: # no nan found in start
476 first_valid_index = 0
477 start_nans = set(range(first_valid_index))
478
479 last_valid_index = find_valid_index(how="last", is_valid=valid)
480 if last_valid_index is None: # no nan found in end
481 last_valid_index = len(yvalues)
482 end_nans = set(range(1 + last_valid_index, len(valid)))
483
484 # Like the sets above, preserve_nans contains indices of invalid values,
485 # but in this case, it is the final set of indices that need to be
486 # preserved as NaN after the interpolation.
487
488 # For example if limit_direction='forward' then preserve_nans will
489 # contain indices of NaNs at the beginning of the series, and NaNs that
490 # are more than 'limit' away from the prior non-NaN.
491
492 # set preserve_nans based on direction using _interp_limit
493 preserve_nans: list | set
494 if limit_direction == "forward":
495 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
496 elif limit_direction == "backward":
497 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
498 else:
499 # both directions... just use _interp_limit
500 preserve_nans = set(_interp_limit(invalid, limit, limit))
501
502 # if limit_area is set, add either mid or outside indices
503 # to preserve_nans GH #16284
504 if limit_area == "inside":
505 # preserve NaNs on the outside
506 preserve_nans |= start_nans | end_nans
507 elif limit_area == "outside":
508 # preserve NaNs on the inside
509 mid_nans = all_nans - start_nans - end_nans
510 preserve_nans |= mid_nans
511
512 # sort preserve_nans and convert to list
513 preserve_nans = sorted(preserve_nans)
514
515 is_datetimelike = yvalues.dtype.kind in "mM"
516
517 if is_datetimelike:
518 yvalues = yvalues.view("i8")
519
520 if method in NP_METHODS:
521 # np.interp requires sorted X values, #21037
522
523 indexer = np.argsort(indices[valid])
524 yvalues[invalid] = np.interp(
525 indices[invalid], indices[valid][indexer], yvalues[valid][indexer]
526 )
527 else:
528 yvalues[invalid] = _interpolate_scipy_wrapper(
529 indices[valid],
530 yvalues[valid],
531 indices[invalid],
532 method=method,
533 fill_value=fill_value,
534 bounds_error=bounds_error,
535 order=order,
536 **kwargs,
537 )
538
539 if mask is not None:
540 mask[:] = False
541 mask[preserve_nans] = True
542 elif is_datetimelike:
543 yvalues[preserve_nans] = NaT.value
544 else:
545 yvalues[preserve_nans] = np.nan
546 return
547
548
549def _interpolate_scipy_wrapper(
550 x: np.ndarray,
551 y: np.ndarray,
552 new_x: np.ndarray,
553 method: str,
554 fill_value=None,
555 bounds_error: bool = False,
556 order=None,
557 **kwargs,
558):
559 """
560 Passed off to scipy.interpolate.interp1d. method is scipy's kind.
561 Returns an array interpolated at new_x. Add any new methods to
562 the list in _clean_interp_method.
563 """
564 extra = f"{method} interpolation requires SciPy."
565 import_optional_dependency("scipy", extra=extra)
566 from scipy import interpolate
567
568 new_x = np.asarray(new_x)
569
570 # ignores some kwargs that could be passed along.
571 alt_methods = {
572 "barycentric": interpolate.barycentric_interpolate,
573 "krogh": interpolate.krogh_interpolate,
574 "from_derivatives": _from_derivatives,
575 "piecewise_polynomial": _from_derivatives,
576 "cubicspline": _cubicspline_interpolate,
577 "akima": _akima_interpolate,
578 "pchip": interpolate.pchip_interpolate,
579 }
580
581 interp1d_methods = [
582 "nearest",
583 "zero",
584 "slinear",
585 "quadratic",
586 "cubic",
587 "polynomial",
588 ]
589 if method in interp1d_methods:
590 if method == "polynomial":
591 kind = order
592 else:
593 kind = method
594 terp = interpolate.interp1d(
595 x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error
596 )
597 new_y = terp(new_x)
598 elif method == "spline":
599 # GH #10633, #24014
600 if isna(order) or (order <= 0):
601 raise ValueError(
602 f"order needs to be specified and greater than 0; got order: {order}"
603 )
604 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)
605 new_y = terp(new_x)
606 else:
607 # GH 7295: need to be able to write for some reason
608 # in some circumstances: check all three
609 if not x.flags.writeable:
610 x = x.copy()
611 if not y.flags.writeable:
612 y = y.copy()
613 if not new_x.flags.writeable:
614 new_x = new_x.copy()
615 terp = alt_methods[method]
616 new_y = terp(x, y, new_x, **kwargs)
617 return new_y
618
619
620def _from_derivatives(
621 xi: np.ndarray,
622 yi: np.ndarray,
623 x: np.ndarray,
624 order=None,
625 der: int | list[int] | None = 0,
626 extrapolate: bool = False,
627):
628 """
629 Convenience function for interpolate.BPoly.from_derivatives.
630
631 Construct a piecewise polynomial in the Bernstein basis, compatible
632 with the specified values and derivatives at breakpoints.
633
634 Parameters
635 ----------
636 xi : array-like
637 sorted 1D array of x-coordinates
638 yi : array-like or list of array-likes
639 yi[i][j] is the j-th derivative known at xi[i]
640 order: None or int or array-like of ints. Default: None.
641 Specifies the degree of local polynomials. If not None, some
642 derivatives are ignored.
643 der : int or list
644 How many derivatives to extract; None for all potentially nonzero
645 derivatives (that is a number equal to the number of points), or a
646 list of derivatives to extract. This number includes the function
647 value as 0th derivative.
648 extrapolate : bool, optional
649 Whether to extrapolate to ouf-of-bounds points based on first and last
650 intervals, or to return NaNs. Default: True.
651
652 See Also
653 --------
654 scipy.interpolate.BPoly.from_derivatives
655
656 Returns
657 -------
658 y : scalar or array-like
659 The result, of length R or length M or M by R.
660 """
661 from scipy import interpolate
662
663 # return the method for compat with scipy version & backwards compat
664 method = interpolate.BPoly.from_derivatives
665 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate)
666
667 return m(x)
668
669
670def _akima_interpolate(
671 xi: np.ndarray,
672 yi: np.ndarray,
673 x: np.ndarray,
674 der: int | list[int] | None = 0,
675 axis: AxisInt = 0,
676):
677 """
678 Convenience function for akima interpolation.
679 xi and yi are arrays of values used to approximate some function f,
680 with ``yi = f(xi)``.
681
682 See `Akima1DInterpolator` for details.
683
684 Parameters
685 ----------
686 xi : np.ndarray
687 A sorted list of x-coordinates, of length N.
688 yi : np.ndarray
689 A 1-D array of real values. `yi`'s length along the interpolation
690 axis must be equal to the length of `xi`. If N-D array, use axis
691 parameter to select correct axis.
692 x : np.ndarray
693 Of length M.
694 der : int, optional
695 How many derivatives to extract; None for all potentially
696 nonzero derivatives (that is a number equal to the number
697 of points), or a list of derivatives to extract. This number
698 includes the function value as 0th derivative.
699 axis : int, optional
700 Axis in the yi array corresponding to the x-coordinate values.
701
702 See Also
703 --------
704 scipy.interpolate.Akima1DInterpolator
705
706 Returns
707 -------
708 y : scalar or array-like
709 The result, of length R or length M or M by R,
710
711 """
712 from scipy import interpolate
713
714 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis)
715
716 return P(x, nu=der)
717
718
719def _cubicspline_interpolate(
720 xi: np.ndarray,
721 yi: np.ndarray,
722 x: np.ndarray,
723 axis: AxisInt = 0,
724 bc_type: str | tuple[Any, Any] = "not-a-knot",
725 extrapolate=None,
726):
727 """
728 Convenience function for cubic spline data interpolator.
729
730 See `scipy.interpolate.CubicSpline` for details.
731
732 Parameters
733 ----------
734 xi : np.ndarray, shape (n,)
735 1-d array containing values of the independent variable.
736 Values must be real, finite and in strictly increasing order.
737 yi : np.ndarray
738 Array containing values of the dependent variable. It can have
739 arbitrary number of dimensions, but the length along ``axis``
740 (see below) must match the length of ``x``. Values must be finite.
741 x : np.ndarray, shape (m,)
742 axis : int, optional
743 Axis along which `y` is assumed to be varying. Meaning that for
744 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
745 Default is 0.
746 bc_type : string or 2-tuple, optional
747 Boundary condition type. Two additional equations, given by the
748 boundary conditions, are required to determine all coefficients of
749 polynomials on each segment [2]_.
750 If `bc_type` is a string, then the specified condition will be applied
751 at both ends of a spline. Available conditions are:
752 * 'not-a-knot' (default): The first and second segment at a curve end
753 are the same polynomial. It is a good default when there is no
754 information on boundary conditions.
755 * 'periodic': The interpolated functions is assumed to be periodic
756 of period ``x[-1] - x[0]``. The first and last value of `y` must be
757 identical: ``y[0] == y[-1]``. This boundary condition will result in
758 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.
759 * 'clamped': The first derivative at curves ends are zero. Assuming
760 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.
761 * 'natural': The second derivative at curve ends are zero. Assuming
762 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.
763 If `bc_type` is a 2-tuple, the first and the second value will be
764 applied at the curve start and end respectively. The tuple values can
765 be one of the previously mentioned strings (except 'periodic') or a
766 tuple `(order, deriv_values)` allowing to specify arbitrary
767 derivatives at curve ends:
768 * `order`: the derivative order, 1 or 2.
769 * `deriv_value`: array-like containing derivative values, shape must
770 be the same as `y`, excluding ``axis`` dimension. For example, if
771 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
772 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
773 and have the shape (n0, n1).
774 extrapolate : {bool, 'periodic', None}, optional
775 If bool, determines whether to extrapolate to out-of-bounds points
776 based on first and last intervals, or to return NaNs. If 'periodic',
777 periodic extrapolation is used. If None (default), ``extrapolate`` is
778 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.
779
780 See Also
781 --------
782 scipy.interpolate.CubicHermiteSpline
783
784 Returns
785 -------
786 y : scalar or array-like
787 The result, of shape (m,)
788
789 References
790 ----------
791 .. [1] `Cubic Spline Interpolation
792 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_
793 on Wikiversity.
794 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.
795 """
796 from scipy import interpolate
797
798 P = interpolate.CubicSpline(
799 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate
800 )
801
802 return P(x)
803
804
805def _interpolate_with_limit_area(
806 values: np.ndarray,
807 method: Literal["pad", "backfill"],
808 limit: int | None,
809 limit_area: Literal["inside", "outside"],
810) -> None:
811 """
812 Apply interpolation and limit_area logic to values along a to-be-specified axis.
813
814 Parameters
815 ----------
816 values: np.ndarray
817 Input array.
818 method: str
819 Interpolation method. Could be "bfill" or "pad"
820 limit: int, optional
821 Index limit on interpolation.
822 limit_area: {'inside', 'outside'}
823 Limit area for interpolation.
824
825 Notes
826 -----
827 Modifies values in-place.
828 """
829
830 invalid = isna(values)
831 is_valid = ~invalid
832
833 if not invalid.all():
834 first = find_valid_index(how="first", is_valid=is_valid)
835 if first is None:
836 first = 0
837 last = find_valid_index(how="last", is_valid=is_valid)
838 if last is None:
839 last = len(values)
840
841 pad_or_backfill_inplace(
842 values,
843 method=method,
844 limit=limit,
845 limit_area=limit_area,
846 )
847
848 if limit_area == "inside":
849 invalid[first : last + 1] = False
850 elif limit_area == "outside":
851 invalid[:first] = invalid[last + 1 :] = False
852 else:
853 raise ValueError("limit_area should be 'inside' or 'outside'")
854
855 values[invalid] = np.nan
856
857
858def pad_or_backfill_inplace(
859 values: np.ndarray,
860 method: Literal["pad", "backfill"] = "pad",
861 axis: AxisInt = 0,
862 limit: int | None = None,
863 limit_area: Literal["inside", "outside"] | None = None,
864) -> None:
865 """
866 Perform an actual interpolation of values, values will be make 2-d if
867 needed fills inplace, returns the result.
868
869 Parameters
870 ----------
871 values: np.ndarray
872 Input array.
873 method: str, default "pad"
874 Interpolation method. Could be "bfill" or "pad"
875 axis: 0 or 1
876 Interpolation axis
877 limit: int, optional
878 Index limit on interpolation.
879 limit_area: str, optional
880 Limit area for interpolation. Can be "inside" or "outside"
881
882 Notes
883 -----
884 Modifies values in-place.
885 """
886 transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
887
888 # reshape a 1 dim if needed
889 if values.ndim == 1:
890 if axis != 0: # pragma: no cover
891 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")
892 values = values.reshape(tuple((1,) + values.shape))
893
894 method = clean_fill_method(method)
895 tvalues = transf(values)
896
897 func = get_fill_func(method, ndim=2)
898 # _pad_2d and _backfill_2d both modify tvalues inplace
899 func(tvalues, limit=limit, limit_area=limit_area)
900
901
902def _fillna_prep(
903 values, mask: npt.NDArray[np.bool_] | None = None
904) -> npt.NDArray[np.bool_]:
905 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
906
907 if mask is None:
908 mask = isna(values)
909
910 return mask
911
912
913def _datetimelike_compat(func: F) -> F:
914 """
915 Wrapper to handle datetime64 and timedelta64 dtypes.
916 """
917
918 @wraps(func)
919 def new_func(
920 values,
921 limit: int | None = None,
922 limit_area: Literal["inside", "outside"] | None = None,
923 mask=None,
924 ):
925 if needs_i8_conversion(values.dtype):
926 if mask is None:
927 # This needs to occur before casting to int64
928 mask = isna(values)
929
930 result, mask = func(
931 values.view("i8"), limit=limit, limit_area=limit_area, mask=mask
932 )
933 return result.view(values.dtype), mask
934
935 return func(values, limit=limit, limit_area=limit_area, mask=mask)
936
937 return cast(F, new_func)
938
939
940@_datetimelike_compat
941def _pad_1d(
942 values: np.ndarray,
943 limit: int | None = None,
944 limit_area: Literal["inside", "outside"] | None = None,
945 mask: npt.NDArray[np.bool_] | None = None,
946) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
947 mask = _fillna_prep(values, mask)
948 if limit_area is not None and not mask.all():
949 _fill_limit_area_1d(mask, limit_area)
950 algos.pad_inplace(values, mask, limit=limit)
951 return values, mask
952
953
954@_datetimelike_compat
955def _backfill_1d(
956 values: np.ndarray,
957 limit: int | None = None,
958 limit_area: Literal["inside", "outside"] | None = None,
959 mask: npt.NDArray[np.bool_] | None = None,
960) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
961 mask = _fillna_prep(values, mask)
962 if limit_area is not None and not mask.all():
963 _fill_limit_area_1d(mask, limit_area)
964 algos.backfill_inplace(values, mask, limit=limit)
965 return values, mask
966
967
968@_datetimelike_compat
969def _pad_2d(
970 values: np.ndarray,
971 limit: int | None = None,
972 limit_area: Literal["inside", "outside"] | None = None,
973 mask: npt.NDArray[np.bool_] | None = None,
974):
975 mask = _fillna_prep(values, mask)
976 if limit_area is not None:
977 _fill_limit_area_2d(mask, limit_area)
978
979 if values.size:
980 algos.pad_2d_inplace(values, mask, limit=limit)
981 else:
982 # for test coverage
983 pass
984 return values, mask
985
986
987@_datetimelike_compat
988def _backfill_2d(
989 values,
990 limit: int | None = None,
991 limit_area: Literal["inside", "outside"] | None = None,
992 mask: npt.NDArray[np.bool_] | None = None,
993):
994 mask = _fillna_prep(values, mask)
995 if limit_area is not None:
996 _fill_limit_area_2d(mask, limit_area)
997
998 if values.size:
999 algos.backfill_2d_inplace(values, mask, limit=limit)
1000 else:
1001 # for test coverage
1002 pass
1003 return values, mask
1004
1005
1006def _fill_limit_area_1d(
1007 mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"]
1008) -> None:
1009 """Prepare 1d mask for ffill/bfill with limit_area.
1010
1011 Caller is responsible for checking at least one value of mask is False.
1012 When called, mask will no longer faithfully represent when
1013 the corresponding are NA or not.
1014
1015 Parameters
1016 ----------
1017 mask : np.ndarray[bool, ndim=1]
1018 Mask representing NA values when filling.
1019 limit_area : { "outside", "inside" }
1020 Whether to limit filling to outside or inside the outer most non-NA value.
1021 """
1022 neg_mask = ~mask
1023 first = neg_mask.argmax()
1024 last = len(neg_mask) - neg_mask[::-1].argmax() - 1
1025 if limit_area == "inside":
1026 mask[:first] = False
1027 mask[last + 1 :] = False
1028 elif limit_area == "outside":
1029 mask[first + 1 : last] = False
1030
1031
1032def _fill_limit_area_2d(
1033 mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"]
1034) -> None:
1035 """Prepare 2d mask for ffill/bfill with limit_area.
1036
1037 When called, mask will no longer faithfully represent when
1038 the corresponding are NA or not.
1039
1040 Parameters
1041 ----------
1042 mask : np.ndarray[bool, ndim=1]
1043 Mask representing NA values when filling.
1044 limit_area : { "outside", "inside" }
1045 Whether to limit filling to outside or inside the outer most non-NA value.
1046 """
1047 neg_mask = ~mask.T
1048 if limit_area == "outside":
1049 # Identify inside
1050 la_mask = (
1051 np.maximum.accumulate(neg_mask, axis=0)
1052 & np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1]
1053 )
1054 else:
1055 # Identify outside
1056 la_mask = (
1057 ~np.maximum.accumulate(neg_mask, axis=0)
1058 | ~np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1]
1059 )
1060 mask[la_mask.T] = False
1061
1062
1063_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}
1064
1065
1066def get_fill_func(method, ndim: int = 1):
1067 method = clean_fill_method(method)
1068 if ndim == 1:
1069 return _fill_methods[method]
1070 return {"pad": _pad_2d, "backfill": _backfill_2d}[method]
1071
1072
1073def clean_reindex_fill_method(method) -> ReindexMethod | None:
1074 if method is None:
1075 return None
1076 return clean_fill_method(method, allow_nearest=True)
1077
1078
1079def _interp_limit(
1080 invalid: npt.NDArray[np.bool_], fw_limit: int | None, bw_limit: int | None
1081):
1082 """
1083 Get indexers of values that won't be filled
1084 because they exceed the limits.
1085
1086 Parameters
1087 ----------
1088 invalid : np.ndarray[bool]
1089 fw_limit : int or None
1090 forward limit to index
1091 bw_limit : int or None
1092 backward limit to index
1093
1094 Returns
1095 -------
1096 set of indexers
1097
1098 Notes
1099 -----
1100 This is equivalent to the more readable, but slower
1101
1102 .. code-block:: python
1103
1104 def _interp_limit(invalid, fw_limit, bw_limit):
1105 for x in np.where(invalid)[0]:
1106 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
1107 yield x
1108 """
1109 # handle forward first; the backward direction is the same except
1110 # 1. operate on the reversed array
1111 # 2. subtract the returned indices from N - 1
1112 N = len(invalid)
1113 f_idx = set()
1114 b_idx = set()
1115
1116 def inner(invalid, limit: int):
1117 limit = min(limit, N)
1118 windowed = _rolling_window(invalid, limit + 1).all(1)
1119 idx = set(np.where(windowed)[0] + limit) | set(
1120 np.where((~invalid[: limit + 1]).cumsum() == 0)[0]
1121 )
1122 return idx
1123
1124 if fw_limit is not None:
1125 if fw_limit == 0:
1126 f_idx = set(np.where(invalid)[0])
1127 else:
1128 f_idx = inner(invalid, fw_limit)
1129
1130 if bw_limit is not None:
1131 if bw_limit == 0:
1132 # then we don't even need to care about backwards
1133 # just use forwards
1134 return f_idx
1135 else:
1136 b_idx_inv = list(inner(invalid[::-1], bw_limit))
1137 b_idx = set(N - 1 - np.asarray(b_idx_inv))
1138 if fw_limit == 0:
1139 return b_idx
1140
1141 return f_idx & b_idx
1142
1143
1144def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]:
1145 """
1146 [True, True, False, True, False], 2 ->
1147
1148 [
1149 [True, True],
1150 [True, False],
1151 [False, True],
1152 [True, False],
1153 ]
1154 """
1155 # https://stackoverflow.com/a/6811241
1156 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
1157 strides = a.strides + (a.strides[-1],)
1158 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)