Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/missing.py: 63%

1"""

2Routines for filling missing data.

3"""

4from __future__ import annotations

6from functools import wraps

7from typing import (

8 TYPE_CHECKING,

9 Any,

10 Literal,

11 cast,

12 overload,

13)

15import numpy as np

17from pandas._libs import (

18 NaT,

19 algos,

20 lib,

21)

22from pandas._typing import (

23 ArrayLike,

24 AxisInt,

25 F,

26 ReindexMethod,

27 npt,

28)

29from pandas.compat._optional import import_optional_dependency

31from pandas.core.dtypes.cast import infer_dtype_from

32from pandas.core.dtypes.common import (

33 is_array_like,

34 is_bool_dtype,

35 is_numeric_dtype,

36 is_numeric_v_string_like,

37 is_object_dtype,

38 needs_i8_conversion,

39)

40from pandas.core.dtypes.dtypes import DatetimeTZDtype

41from pandas.core.dtypes.missing import (

42 is_valid_na_for_dtype,

43 isna,

44 na_value_for_dtype,

45)

47if TYPE_CHECKING:

48 from pandas import Index

51def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):

52 """

53 Validate the size of the values passed to ExtensionArray.fillna.

54 """

55 if is_array_like(value):

56 if len(value) != length:

57 raise ValueError(

58 f"Length of 'value' does not match. Got ({len(value)}) "

59 f" expected {length}"

60 )

61 value = value[mask]

63 return value

66def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:

67 """

68 Return a masking array of same size/shape as arr

69 with entries equaling any member of values_to_mask set to True

71 Parameters

72 ----------

73 arr : ArrayLike

74 values_to_mask: list, tuple, or scalar

76 Returns

77 -------

78 np.ndarray[bool]

79 """

80 # When called from Block.replace/replace_list, values_to_mask is a scalar

81 # known to be holdable by arr.

82 # When called from Series._single_replace, values_to_mask is tuple or list

83 dtype, values_to_mask = infer_dtype_from(values_to_mask)

85 if isinstance(dtype, np.dtype):

86 values_to_mask = np.array(values_to_mask, dtype=dtype)

87 else:

88 cls = dtype.construct_array_type()

89 if not lib.is_list_like(values_to_mask):

90 values_to_mask = [values_to_mask]

91 values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False)

93 potential_na = False

94 if is_object_dtype(arr.dtype):

95 # pre-compute mask to avoid comparison to NA

96 potential_na = True

97 arr_mask = ~isna(arr)

99 na_mask = isna(values_to_mask)

100 nonna = values_to_mask[~na_mask]

101

102 # GH 21977

103 mask = np.zeros(arr.shape, dtype=bool)

104 if (

105 is_numeric_dtype(arr.dtype)

106 and not is_bool_dtype(arr.dtype)

107 and is_bool_dtype(nonna.dtype)

108 ):

109 pass

110 elif (

111 is_bool_dtype(arr.dtype)

112 and is_numeric_dtype(nonna.dtype)

113 and not is_bool_dtype(nonna.dtype)

114 ):

115 pass

116 else:

117 for x in nonna:

118 if is_numeric_v_string_like(arr, x):

119 # GH#29553 prevent numpy deprecation warnings

120 pass

121 else:

122 if potential_na:

123 new_mask = np.zeros(arr.shape, dtype=np.bool_)

124 new_mask[arr_mask] = arr[arr_mask] == x

125 else:

126 new_mask = arr == x

127

128 if not isinstance(new_mask, np.ndarray):

129 # usually BooleanArray

130 new_mask = new_mask.to_numpy(dtype=bool, na_value=False)

131 mask |= new_mask

132

133 if na_mask.any():

134 mask |= isna(arr)

135

136 return mask

137

138

139@overload

140def clean_fill_method(

141 method: Literal["ffill", "pad", "bfill", "backfill"],

142 *,

143 allow_nearest: Literal[False] = ...,

144) -> Literal["pad", "backfill"]:

145 ...

146

147

148@overload

149def clean_fill_method(

150 method: Literal["ffill", "pad", "bfill", "backfill", "nearest"],

151 *,

152 allow_nearest: Literal[True],

153) -> Literal["pad", "backfill", "nearest"]:

154 ...

155

156

157def clean_fill_method(

158 method: Literal["ffill", "pad", "bfill", "backfill", "nearest"],

159 *,

160 allow_nearest: bool = False,

161) -> Literal["pad", "backfill", "nearest"]:

162 if isinstance(method, str):

163 # error: Incompatible types in assignment (expression has type "str", variable

164 # has type "Literal['ffill', 'pad', 'bfill', 'backfill', 'nearest']")

165 method = method.lower() # type: ignore[assignment]

166 if method == "ffill":

167 method = "pad"

168 elif method == "bfill":

169 method = "backfill"

170

171 valid_methods = ["pad", "backfill"]

172 expecting = "pad (ffill) or backfill (bfill)"

173 if allow_nearest:

174 valid_methods.append("nearest")

175 expecting = "pad (ffill), backfill (bfill) or nearest"

176 if method not in valid_methods:

177 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")

178 return method

179

180

181# interpolation methods that dispatch to np.interp

182

183NP_METHODS = ["linear", "time", "index", "values"]

184

185# interpolation methods that dispatch to _interpolate_scipy_wrapper

186

187SP_METHODS = [

188 "nearest",

189 "zero",

190 "slinear",

191 "quadratic",

192 "cubic",

193 "barycentric",

194 "krogh",

195 "spline",

196 "polynomial",

197 "from_derivatives",

198 "piecewise_polynomial",

199 "pchip",

200 "akima",

201 "cubicspline",

202]

203

204

205def clean_interp_method(method: str, index: Index, **kwargs) -> str:

206 order = kwargs.get("order")

207

208 if method in ("spline", "polynomial") and order is None:

209 raise ValueError("You must specify the order of the spline or polynomial.")

210

211 valid = NP_METHODS + SP_METHODS

212 if method not in valid:

213 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")

214

215 if method in ("krogh", "piecewise_polynomial", "pchip"):

216 if not index.is_monotonic_increasing:

217 raise ValueError(

218 f"{method} interpolation requires that the index be monotonic."

219 )

220

221 return method

222

223

224def find_valid_index(how: str, is_valid: npt.NDArray[np.bool_]) -> int | None:

225 """

226 Retrieves the positional index of the first valid value.

227

228 Parameters

229 ----------

230 how : {'first', 'last'}

231 Use this parameter to change between the first or last valid index.

232 is_valid: np.ndarray

233 Mask to find na_values.

234

235 Returns

236 -------

237 int or None

238 """

239 assert how in ["first", "last"]

240

241 if len(is_valid) == 0: # early stop

242 return None

243

244 if is_valid.ndim == 2:

245 is_valid = is_valid.any(axis=1) # reduce axis 1

246

247 if how == "first":

248 idxpos = is_valid[::].argmax()

249

250 elif how == "last":

251 idxpos = len(is_valid) - 1 - is_valid[::-1].argmax()

252

253 chk_notna = is_valid[idxpos]

254

255 if not chk_notna:

256 return None

257 # Incompatible return value type (got "signedinteger[Any]",

258 # expected "Optional[int]")

259 return idxpos # type: ignore[return-value]

260

261

262def validate_limit_direction(

263 limit_direction: str,

264) -> Literal["forward", "backward", "both"]:

265 valid_limit_directions = ["forward", "backward", "both"]

266 limit_direction = limit_direction.lower()

267 if limit_direction not in valid_limit_directions:

268 raise ValueError(

269 "Invalid limit_direction: expecting one of "

270 f"{valid_limit_directions}, got '{limit_direction}'."

271 )

272 # error: Incompatible return value type (got "str", expected

273 # "Literal['forward', 'backward', 'both']")

274 return limit_direction # type: ignore[return-value]

275

276

277def validate_limit_area(limit_area: str | None) -> Literal["inside", "outside"] | None:

278 if limit_area is not None:

279 valid_limit_areas = ["inside", "outside"]

280 limit_area = limit_area.lower()

281 if limit_area not in valid_limit_areas:

282 raise ValueError(

283 f"Invalid limit_area: expecting one of {valid_limit_areas}, got "

284 f"{limit_area}."

285 )

286 # error: Incompatible return value type (got "Optional[str]", expected

287 # "Optional[Literal['inside', 'outside']]")

288 return limit_area # type: ignore[return-value]

289

290

291def infer_limit_direction(

292 limit_direction: Literal["backward", "forward", "both"] | None, method: str

293) -> Literal["backward", "forward", "both"]:

294 # Set `limit_direction` depending on `method`

295 if limit_direction is None:

296 if method in ("backfill", "bfill"):

297 limit_direction = "backward"

298 else:

299 limit_direction = "forward"

300 else:

301 if method in ("pad", "ffill") and limit_direction != "forward":

302 raise ValueError(

303 f"`limit_direction` must be 'forward' for method `{method}`"

304 )

305 if method in ("backfill", "bfill") and limit_direction != "backward":

306 raise ValueError(

307 f"`limit_direction` must be 'backward' for method `{method}`"

308 )

309 return limit_direction

310

311

312def get_interp_index(method, index: Index) -> Index:

313 # create/use the index

314 if method == "linear":

315 # prior default

316 from pandas import Index

317

318 index = Index(np.arange(len(index)))

319 else:

320 methods = {"index", "values", "nearest", "time"}

321 is_numeric_or_datetime = (

322 is_numeric_dtype(index.dtype)

323 or isinstance(index.dtype, DatetimeTZDtype)

324 or lib.is_np_dtype(index.dtype, "mM")

325 )

326 if method not in methods and not is_numeric_or_datetime:

327 raise ValueError(

328 "Index column must be numeric or datetime type when "

329 f"using {method} method other than linear. "

330 "Try setting a numeric or datetime index column before "

331 "interpolating."

332 )

333

334 if isna(index).any():

335 raise NotImplementedError(

336 "Interpolation with NaNs in the index "

337 "has not been implemented. Try filling "

338 "those NaNs before interpolating."

339 )

340 return index

341

342

343def interpolate_2d_inplace(

344 data: np.ndarray, # floating dtype

345 index: Index,

346 axis: AxisInt,

347 method: str = "linear",

348 limit: int | None = None,

349 limit_direction: str = "forward",

350 limit_area: str | None = None,

351 fill_value: Any | None = None,

352 mask=None,

353 **kwargs,

354) -> None:

355 """

356 Column-wise application of _interpolate_1d.

357

358 Notes

359 -----

360 Alters 'data' in-place.

361

362 The signature does differ from _interpolate_1d because it only

363 includes what is needed for Block.interpolate.

364 """

365 # validate the interp method

366 clean_interp_method(method, index, **kwargs)

367

368 if is_valid_na_for_dtype(fill_value, data.dtype):

369 fill_value = na_value_for_dtype(data.dtype, compat=False)

370

371 if method == "time":

372 if not needs_i8_conversion(index.dtype):

373 raise ValueError(

374 "time-weighted interpolation only works "

375 "on Series or DataFrames with a "

376 "DatetimeIndex"

377 )

378 method = "values"

379

380 limit_direction = validate_limit_direction(limit_direction)

381 limit_area_validated = validate_limit_area(limit_area)

382

383 # default limit is unlimited GH #16282

384 limit = algos.validate_limit(nobs=None, limit=limit)

385

386 indices = _index_to_interp_indices(index, method)

387

388 def func(yvalues: np.ndarray) -> None:

389 # process 1-d slices in the axis direction

390

391 _interpolate_1d(

392 indices=indices,

393 yvalues=yvalues,

394 method=method,

395 limit=limit,

396 limit_direction=limit_direction,

397 limit_area=limit_area_validated,

398 fill_value=fill_value,

399 bounds_error=False,

400 mask=mask,

401 **kwargs,

402 )

403

404 # error: Argument 1 to "apply_along_axis" has incompatible type

405 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[...,

406 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray

407 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],

408 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],

409 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]"

410 np.apply_along_axis(func, axis, data) # type: ignore[arg-type]

411

412

413def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:

414 """

415 Convert Index to ndarray of indices to pass to NumPy/SciPy.

416 """

417 xarr = index._values

418 if needs_i8_conversion(xarr.dtype):

419 # GH#1646 for dt64tz

420 xarr = xarr.view("i8")

421

422 if method == "linear":

423 inds = xarr

424 inds = cast(np.ndarray, inds)

425 else:

426 inds = np.asarray(xarr)

427

428 if method in ("values", "index"):

429 if inds.dtype == np.object_:

430 inds = lib.maybe_convert_objects(inds)

431

432 return inds

433

434

435def _interpolate_1d(

436 indices: np.ndarray,

437 yvalues: np.ndarray,

438 method: str = "linear",

439 limit: int | None = None,

440 limit_direction: str = "forward",

441 limit_area: Literal["inside", "outside"] | None = None,

442 fill_value: Any | None = None,

443 bounds_error: bool = False,

444 order: int | None = None,

445 mask=None,

446 **kwargs,

447) -> None:

448 """

449 Logic for the 1-d interpolation. The input

450 indices and yvalues will each be 1-d arrays of the same length.

451

452 Bounds_error is currently hardcoded to False since non-scipy ones don't

453 take it as an argument.

454

455 Notes

456 -----

457 Fills 'yvalues' in-place.

458 """

459 if mask is not None:

460 invalid = mask

461 else:

462 invalid = isna(yvalues)

463 valid = ~invalid

464

465 if not valid.any():

466 return

467

468 if valid.all():

469 return

470

471 # These are sets of index pointers to invalid values... i.e. {0, 1, etc...

472 all_nans = set(np.flatnonzero(invalid))

473

474 first_valid_index = find_valid_index(how="first", is_valid=valid)

475 if first_valid_index is None: # no nan found in start

476 first_valid_index = 0

477 start_nans = set(range(first_valid_index))

478

479 last_valid_index = find_valid_index(how="last", is_valid=valid)

480 if last_valid_index is None: # no nan found in end

481 last_valid_index = len(yvalues)

482 end_nans = set(range(1 + last_valid_index, len(valid)))

483

484 # Like the sets above, preserve_nans contains indices of invalid values,

485 # but in this case, it is the final set of indices that need to be

486 # preserved as NaN after the interpolation.

487

488 # For example if limit_direction='forward' then preserve_nans will

489 # contain indices of NaNs at the beginning of the series, and NaNs that

490 # are more than 'limit' away from the prior non-NaN.

491

492 # set preserve_nans based on direction using _interp_limit

493 preserve_nans: list | set

494 if limit_direction == "forward":

495 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))

496 elif limit_direction == "backward":

497 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))

498 else:

499 # both directions... just use _interp_limit

500 preserve_nans = set(_interp_limit(invalid, limit, limit))

501

502 # if limit_area is set, add either mid or outside indices

503 # to preserve_nans GH #16284

504 if limit_area == "inside":

505 # preserve NaNs on the outside

506 preserve_nans |= start_nans | end_nans

507 elif limit_area == "outside":

508 # preserve NaNs on the inside

509 mid_nans = all_nans - start_nans - end_nans

510 preserve_nans |= mid_nans

511

512 # sort preserve_nans and convert to list

513 preserve_nans = sorted(preserve_nans)

514

515 is_datetimelike = yvalues.dtype.kind in "mM"

516

517 if is_datetimelike:

518 yvalues = yvalues.view("i8")

519

520 if method in NP_METHODS:

521 # np.interp requires sorted X values, #21037

522

523 indexer = np.argsort(indices[valid])

524 yvalues[invalid] = np.interp(

525 indices[invalid], indices[valid][indexer], yvalues[valid][indexer]

526 )

527 else:

528 yvalues[invalid] = _interpolate_scipy_wrapper(

529 indices[valid],

530 yvalues[valid],

531 indices[invalid],

532 method=method,

533 fill_value=fill_value,

534 bounds_error=bounds_error,

535 order=order,

536 **kwargs,

537 )

538

539 if mask is not None:

540 mask[:] = False

541 mask[preserve_nans] = True

542 elif is_datetimelike:

543 yvalues[preserve_nans] = NaT.value

544 else:

545 yvalues[preserve_nans] = np.nan

546 return

547

548

549def _interpolate_scipy_wrapper(

550 x: np.ndarray,

551 y: np.ndarray,

552 new_x: np.ndarray,

553 method: str,

554 fill_value=None,

555 bounds_error: bool = False,

556 order=None,

557 **kwargs,

558):

559 """

560 Passed off to scipy.interpolate.interp1d. method is scipy's kind.

561 Returns an array interpolated at new_x. Add any new methods to

562 the list in _clean_interp_method.

563 """

564 extra = f"{method} interpolation requires SciPy."

565 import_optional_dependency("scipy", extra=extra)

566 from scipy import interpolate

567

568 new_x = np.asarray(new_x)

569

570 # ignores some kwargs that could be passed along.

571 alt_methods = {

572 "barycentric": interpolate.barycentric_interpolate,

573 "krogh": interpolate.krogh_interpolate,

574 "from_derivatives": _from_derivatives,

575 "piecewise_polynomial": _from_derivatives,

576 "cubicspline": _cubicspline_interpolate,

577 "akima": _akima_interpolate,

578 "pchip": interpolate.pchip_interpolate,

579 }

580

581 interp1d_methods = [

582 "nearest",

583 "zero",

584 "slinear",

585 "quadratic",

586 "cubic",

587 "polynomial",

588 ]

589 if method in interp1d_methods:

590 if method == "polynomial":

591 kind = order

592 else:

593 kind = method

594 terp = interpolate.interp1d(

595 x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error

596 )

597 new_y = terp(new_x)

598 elif method == "spline":

599 # GH #10633, #24014

600 if isna(order) or (order <= 0):

601 raise ValueError(

602 f"order needs to be specified and greater than 0; got order: {order}"

603 )

604 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)

605 new_y = terp(new_x)

606 else:

607 # GH 7295: need to be able to write for some reason

608 # in some circumstances: check all three

609 if not x.flags.writeable:

610 x = x.copy()

611 if not y.flags.writeable:

612 y = y.copy()

613 if not new_x.flags.writeable:

614 new_x = new_x.copy()

615 terp = alt_methods[method]

616 new_y = terp(x, y, new_x, **kwargs)

617 return new_y

618

619

620def _from_derivatives(

621 xi: np.ndarray,

622 yi: np.ndarray,

623 x: np.ndarray,

624 order=None,

625 der: int | list[int] | None = 0,

626 extrapolate: bool = False,

627):

628 """

629 Convenience function for interpolate.BPoly.from_derivatives.

630

631 Construct a piecewise polynomial in the Bernstein basis, compatible

632 with the specified values and derivatives at breakpoints.

633

634 Parameters

635 ----------

636 xi : array-like

637 sorted 1D array of x-coordinates

638 yi : array-like or list of array-likes

639 yi[i][j] is the j-th derivative known at xi[i]

640 order: None or int or array-like of ints. Default: None.

641 Specifies the degree of local polynomials. If not None, some

642 derivatives are ignored.

643 der : int or list

644 How many derivatives to extract; None for all potentially nonzero

645 derivatives (that is a number equal to the number of points), or a

646 list of derivatives to extract. This number includes the function

647 value as 0th derivative.

648 extrapolate : bool, optional

649 Whether to extrapolate to ouf-of-bounds points based on first and last

650 intervals, or to return NaNs. Default: True.

651

652 See Also

653 --------

654 scipy.interpolate.BPoly.from_derivatives

655

656 Returns

657 -------

658 y : scalar or array-like

659 The result, of length R or length M or M by R.

660 """

661 from scipy import interpolate

662

663 # return the method for compat with scipy version & backwards compat

664 method = interpolate.BPoly.from_derivatives

665 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate)

666

667 return m(x)

668

669

670def _akima_interpolate(

671 xi: np.ndarray,

672 yi: np.ndarray,

673 x: np.ndarray,

674 der: int | list[int] | None = 0,

675 axis: AxisInt = 0,

676):

677 """

678 Convenience function for akima interpolation.

679 xi and yi are arrays of values used to approximate some function f,

680 with ``yi = f(xi)``.

681

682 See `Akima1DInterpolator` for details.

683

684 Parameters

685 ----------

686 xi : np.ndarray

687 A sorted list of x-coordinates, of length N.

688 yi : np.ndarray

689 A 1-D array of real values. `yi`'s length along the interpolation

690 axis must be equal to the length of `xi`. If N-D array, use axis

691 parameter to select correct axis.

692 x : np.ndarray

693 Of length M.

694 der : int, optional

695 How many derivatives to extract; None for all potentially

696 nonzero derivatives (that is a number equal to the number

697 of points), or a list of derivatives to extract. This number

698 includes the function value as 0th derivative.

699 axis : int, optional

700 Axis in the yi array corresponding to the x-coordinate values.

701

702 See Also

703 --------

704 scipy.interpolate.Akima1DInterpolator

705

706 Returns

707 -------

708 y : scalar or array-like

709 The result, of length R or length M or M by R,

710

711 """

712 from scipy import interpolate

713

714 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis)

715

716 return P(x, nu=der)

717

718

719def _cubicspline_interpolate(

720 xi: np.ndarray,

721 yi: np.ndarray,

722 x: np.ndarray,

723 axis: AxisInt = 0,

724 bc_type: str | tuple[Any, Any] = "not-a-knot",

725 extrapolate=None,

726):

727 """

728 Convenience function for cubic spline data interpolator.

729

730 See `scipy.interpolate.CubicSpline` for details.

731

732 Parameters

733 ----------

734 xi : np.ndarray, shape (n,)

735 1-d array containing values of the independent variable.

736 Values must be real, finite and in strictly increasing order.

737 yi : np.ndarray

738 Array containing values of the dependent variable. It can have

739 arbitrary number of dimensions, but the length along ``axis``

740 (see below) must match the length of ``x``. Values must be finite.

741 x : np.ndarray, shape (m,)

742 axis : int, optional

743 Axis along which `y` is assumed to be varying. Meaning that for

744 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.

745 Default is 0.

746 bc_type : string or 2-tuple, optional

747 Boundary condition type. Two additional equations, given by the

748 boundary conditions, are required to determine all coefficients of

749 polynomials on each segment [2]_.

750 If `bc_type` is a string, then the specified condition will be applied

751 at both ends of a spline. Available conditions are:

752 * 'not-a-knot' (default): The first and second segment at a curve end

753 are the same polynomial. It is a good default when there is no

754 information on boundary conditions.

755 * 'periodic': The interpolated functions is assumed to be periodic

756 of period ``x[-1] - x[0]``. The first and last value of `y` must be

757 identical: ``y[0] == y[-1]``. This boundary condition will result in

758 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.

759 * 'clamped': The first derivative at curves ends are zero. Assuming

760 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.

761 * 'natural': The second derivative at curve ends are zero. Assuming

762 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.

763 If `bc_type` is a 2-tuple, the first and the second value will be

764 applied at the curve start and end respectively. The tuple values can

765 be one of the previously mentioned strings (except 'periodic') or a

766 tuple `(order, deriv_values)` allowing to specify arbitrary

767 derivatives at curve ends:

768 * `order`: the derivative order, 1 or 2.

769 * `deriv_value`: array-like containing derivative values, shape must

770 be the same as `y`, excluding ``axis`` dimension. For example, if

771 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with

772 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D

773 and have the shape (n0, n1).

774 extrapolate : {bool, 'periodic', None}, optional

775 If bool, determines whether to extrapolate to out-of-bounds points

776 based on first and last intervals, or to return NaNs. If 'periodic',

777 periodic extrapolation is used. If None (default), ``extrapolate`` is

778 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.

779

780 See Also

781 --------

782 scipy.interpolate.CubicHermiteSpline

783

784 Returns

785 -------

786 y : scalar or array-like

787 The result, of shape (m,)

788

789 References

790 ----------

791 .. [1] `Cubic Spline Interpolation

792 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_

793 on Wikiversity.

794 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.

795 """

796 from scipy import interpolate

797

798 P = interpolate.CubicSpline(

799 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate

800 )

801

802 return P(x)

803

804

805def _interpolate_with_limit_area(

806 values: np.ndarray,

807 method: Literal["pad", "backfill"],

808 limit: int | None,

809 limit_area: Literal["inside", "outside"],

810) -> None:

811 """

812 Apply interpolation and limit_area logic to values along a to-be-specified axis.

813

814 Parameters

815 ----------

816 values: np.ndarray

817 Input array.

818 method: str

819 Interpolation method. Could be "bfill" or "pad"

820 limit: int, optional

821 Index limit on interpolation.

822 limit_area: {'inside', 'outside'}

823 Limit area for interpolation.

824

825 Notes

826 -----

827 Modifies values in-place.

828 """

829

830 invalid = isna(values)

831 is_valid = ~invalid

832

833 if not invalid.all():

834 first = find_valid_index(how="first", is_valid=is_valid)

835 if first is None:

836 first = 0

837 last = find_valid_index(how="last", is_valid=is_valid)

838 if last is None:

839 last = len(values)

840

841 pad_or_backfill_inplace(

842 values,

843 method=method,

844 limit=limit,

845 limit_area=limit_area,

846 )

847

848 if limit_area == "inside":

849 invalid[first : last + 1] = False

850 elif limit_area == "outside":

851 invalid[:first] = invalid[last + 1 :] = False

852 else:

853 raise ValueError("limit_area should be 'inside' or 'outside'")

854

855 values[invalid] = np.nan

856

857

858def pad_or_backfill_inplace(

859 values: np.ndarray,

860 method: Literal["pad", "backfill"] = "pad",

861 axis: AxisInt = 0,

862 limit: int | None = None,

863 limit_area: Literal["inside", "outside"] | None = None,

864) -> None:

865 """

866 Perform an actual interpolation of values, values will be make 2-d if

867 needed fills inplace, returns the result.

868

869 Parameters

870 ----------

871 values: np.ndarray

872 Input array.

873 method: str, default "pad"

874 Interpolation method. Could be "bfill" or "pad"

875 axis: 0 or 1

876 Interpolation axis

877 limit: int, optional

878 Index limit on interpolation.

879 limit_area: str, optional

880 Limit area for interpolation. Can be "inside" or "outside"

881

882 Notes

883 -----

884 Modifies values in-place.

885 """

886 transf = (lambda x: x) if axis == 0 else (lambda x: x.T)

887

888 # reshape a 1 dim if needed

889 if values.ndim == 1:

890 if axis != 0: # pragma: no cover

891 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")

892 values = values.reshape(tuple((1,) + values.shape))

893

894 method = clean_fill_method(method)

895 tvalues = transf(values)

896

897 func = get_fill_func(method, ndim=2)

898 # _pad_2d and _backfill_2d both modify tvalues inplace

899 func(tvalues, limit=limit, limit_area=limit_area)

900

901

902def _fillna_prep(

903 values, mask: npt.NDArray[np.bool_] | None = None

904) -> npt.NDArray[np.bool_]:

905 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d

906

907 if mask is None:

908 mask = isna(values)

909

910 return mask

911

912

913def _datetimelike_compat(func: F) -> F:

914 """

915 Wrapper to handle datetime64 and timedelta64 dtypes.

916 """

917

918 @wraps(func)

919 def new_func(

920 values,

921 limit: int | None = None,

922 limit_area: Literal["inside", "outside"] | None = None,

923 mask=None,

924 ):

925 if needs_i8_conversion(values.dtype):

926 if mask is None:

927 # This needs to occur before casting to int64

928 mask = isna(values)

929

930 result, mask = func(

931 values.view("i8"), limit=limit, limit_area=limit_area, mask=mask

932 )

933 return result.view(values.dtype), mask

934

935 return func(values, limit=limit, limit_area=limit_area, mask=mask)

936

937 return cast(F, new_func)

938

939

940@_datetimelike_compat

941def _pad_1d(

942 values: np.ndarray,

943 limit: int | None = None,

944 limit_area: Literal["inside", "outside"] | None = None,

945 mask: npt.NDArray[np.bool_] | None = None,

946) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:

947 mask = _fillna_prep(values, mask)

948 if limit_area is not None and not mask.all():

949 _fill_limit_area_1d(mask, limit_area)

950 algos.pad_inplace(values, mask, limit=limit)

951 return values, mask

952

953

954@_datetimelike_compat

955def _backfill_1d(

956 values: np.ndarray,

957 limit: int | None = None,

958 limit_area: Literal["inside", "outside"] | None = None,

959 mask: npt.NDArray[np.bool_] | None = None,

960) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:

961 mask = _fillna_prep(values, mask)

962 if limit_area is not None and not mask.all():

963 _fill_limit_area_1d(mask, limit_area)

964 algos.backfill_inplace(values, mask, limit=limit)

965 return values, mask

966

967

968@_datetimelike_compat

969def _pad_2d(

970 values: np.ndarray,

971 limit: int | None = None,

972 limit_area: Literal["inside", "outside"] | None = None,

973 mask: npt.NDArray[np.bool_] | None = None,

974):

975 mask = _fillna_prep(values, mask)

976 if limit_area is not None:

977 _fill_limit_area_2d(mask, limit_area)

978

979 if values.size:

980 algos.pad_2d_inplace(values, mask, limit=limit)

981 else:

982 # for test coverage

983 pass

984 return values, mask

985

986

987@_datetimelike_compat

988def _backfill_2d(

989 values,

990 limit: int | None = None,

991 limit_area: Literal["inside", "outside"] | None = None,

992 mask: npt.NDArray[np.bool_] | None = None,

993):

994 mask = _fillna_prep(values, mask)

995 if limit_area is not None:

996 _fill_limit_area_2d(mask, limit_area)

997

998 if values.size:

999 algos.backfill_2d_inplace(values, mask, limit=limit)

1000 else:

1001 # for test coverage

1002 pass

1003 return values, mask

1004

1005

1006def _fill_limit_area_1d(

1007 mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"]

1008) -> None:

1009 """Prepare 1d mask for ffill/bfill with limit_area.

1010

1011 Caller is responsible for checking at least one value of mask is False.

1012 When called, mask will no longer faithfully represent when

1013 the corresponding are NA or not.

1014

1015 Parameters

1016 ----------

1017 mask : np.ndarray[bool, ndim=1]

1018 Mask representing NA values when filling.

1019 limit_area : { "outside", "inside" }

1020 Whether to limit filling to outside or inside the outer most non-NA value.

1021 """

1022 neg_mask = ~mask

1023 first = neg_mask.argmax()

1024 last = len(neg_mask) - neg_mask[::-1].argmax() - 1

1025 if limit_area == "inside":

1026 mask[:first] = False

1027 mask[last + 1 :] = False

1028 elif limit_area == "outside":

1029 mask[first + 1 : last] = False

1030

1031

1032def _fill_limit_area_2d(

1033 mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"]

1034) -> None:

1035 """Prepare 2d mask for ffill/bfill with limit_area.

1036

1037 When called, mask will no longer faithfully represent when

1038 the corresponding are NA or not.

1039

1040 Parameters

1041 ----------

1042 mask : np.ndarray[bool, ndim=1]

1043 Mask representing NA values when filling.

1044 limit_area : { "outside", "inside" }

1045 Whether to limit filling to outside or inside the outer most non-NA value.

1046 """

1047 neg_mask = ~mask.T

1048 if limit_area == "outside":

1049 # Identify inside

1050 la_mask = (

1051 np.maximum.accumulate(neg_mask, axis=0)

1052 & np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1]

1053 )

1054 else:

1055 # Identify outside

1056 la_mask = (

1057 ~np.maximum.accumulate(neg_mask, axis=0)

1058 | ~np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1]

1059 )

1060 mask[la_mask.T] = False

1061

1062

1063_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}

1064

1065

1066def get_fill_func(method, ndim: int = 1):

1067 method = clean_fill_method(method)

1068 if ndim == 1:

1069 return _fill_methods[method]

1070 return {"pad": _pad_2d, "backfill": _backfill_2d}[method]

1071

1072

1073def clean_reindex_fill_method(method) -> ReindexMethod | None:

1074 if method is None:

1075 return None

1076 return clean_fill_method(method, allow_nearest=True)

1077

1078

1079def _interp_limit(

1080 invalid: npt.NDArray[np.bool_], fw_limit: int | None, bw_limit: int | None

1081):

1082 """

1083 Get indexers of values that won't be filled

1084 because they exceed the limits.

1085

1086 Parameters

1087 ----------

1088 invalid : np.ndarray[bool]

1089 fw_limit : int or None

1090 forward limit to index

1091 bw_limit : int or None

1092 backward limit to index

1093

1094 Returns

1095 -------

1096 set of indexers

1097

1098 Notes

1099 -----

1100 This is equivalent to the more readable, but slower

1101

1102 .. code-block:: python

1103

1104 def _interp_limit(invalid, fw_limit, bw_limit):

1105 for x in np.where(invalid)[0]:

1106 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():

1107 yield x

1108 """

1109 # handle forward first; the backward direction is the same except

1110 # 1. operate on the reversed array

1111 # 2. subtract the returned indices from N - 1

1112 N = len(invalid)

1113 f_idx = set()

1114 b_idx = set()

1115

1116 def inner(invalid, limit: int):

1117 limit = min(limit, N)

1118 windowed = _rolling_window(invalid, limit + 1).all(1)

1119 idx = set(np.where(windowed)[0] + limit) | set(

1120 np.where((~invalid[: limit + 1]).cumsum() == 0)[0]

1121 )

1122 return idx

1123

1124 if fw_limit is not None:

1125 if fw_limit == 0:

1126 f_idx = set(np.where(invalid)[0])

1127 else:

1128 f_idx = inner(invalid, fw_limit)

1129

1130 if bw_limit is not None:

1131 if bw_limit == 0:

1132 # then we don't even need to care about backwards

1133 # just use forwards

1134 return f_idx

1135 else:

1136 b_idx_inv = list(inner(invalid[::-1], bw_limit))

1137 b_idx = set(N - 1 - np.asarray(b_idx_inv))

1138 if fw_limit == 0:

1139 return b_idx

1140

1141 return f_idx & b_idx

1142

1143

1144def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]:

1145 """

1146 [True, True, False, True, False], 2 ->

1147

1148 [

1149 [True, True],

1150 [True, False],

1151 [False, True],

1152 [True, False],

1153 ]

1154 """

1155 # https://stackoverflow.com/a/6811241

1156 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)

1157 strides = a.strides + (a.strides[-1],)

1158 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)