Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/missing.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

314 statements  

1""" 

2Routines for filling missing data. 

3""" 

4from __future__ import annotations 

5 

6from functools import ( 

7 partial, 

8 wraps, 

9) 

10from typing import ( 

11 TYPE_CHECKING, 

12 Any, 

13 cast, 

14) 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 NaT, 

20 algos, 

21 lib, 

22) 

23from pandas._typing import ( 

24 ArrayLike, 

25 Axis, 

26 AxisInt, 

27 F, 

28 npt, 

29) 

30from pandas.compat._optional import import_optional_dependency 

31 

32from pandas.core.dtypes.cast import infer_dtype_from 

33from pandas.core.dtypes.common import ( 

34 is_array_like, 

35 is_numeric_v_string_like, 

36 is_object_dtype, 

37 needs_i8_conversion, 

38) 

39from pandas.core.dtypes.missing import ( 

40 is_valid_na_for_dtype, 

41 isna, 

42 na_value_for_dtype, 

43) 

44 

45if TYPE_CHECKING: 

46 from pandas import Index 

47 

48 

49def check_value_size(value, mask: npt.NDArray[np.bool_], length: int): 

50 """ 

51 Validate the size of the values passed to ExtensionArray.fillna. 

52 """ 

53 if is_array_like(value): 

54 if len(value) != length: 

55 raise ValueError( 

56 f"Length of 'value' does not match. Got ({len(value)}) " 

57 f" expected {length}" 

58 ) 

59 value = value[mask] 

60 

61 return value 

62 

63 

64def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: 

65 """ 

66 Return a masking array of same size/shape as arr 

67 with entries equaling any member of values_to_mask set to True 

68 

69 Parameters 

70 ---------- 

71 arr : ArrayLike 

72 values_to_mask: list, tuple, or scalar 

73 

74 Returns 

75 ------- 

76 np.ndarray[bool] 

77 """ 

78 # When called from Block.replace/replace_list, values_to_mask is a scalar 

79 # known to be holdable by arr. 

80 # When called from Series._single_replace, values_to_mask is tuple or list 

81 dtype, values_to_mask = infer_dtype_from(values_to_mask) 

82 # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any], 

83 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, 

84 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], 

85 # _DTypeDict, Tuple[Any, Any]]]" 

86 values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type] 

87 

88 potential_na = False 

89 if is_object_dtype(arr): 

90 # pre-compute mask to avoid comparison to NA 

91 potential_na = True 

92 arr_mask = ~isna(arr) 

93 

94 na_mask = isna(values_to_mask) 

95 nonna = values_to_mask[~na_mask] 

96 

97 # GH 21977 

98 mask = np.zeros(arr.shape, dtype=bool) 

99 for x in nonna: 

100 if is_numeric_v_string_like(arr, x): 

101 # GH#29553 prevent numpy deprecation warnings 

102 pass 

103 else: 

104 if potential_na: 

105 new_mask = np.zeros(arr.shape, dtype=np.bool_) 

106 new_mask[arr_mask] = arr[arr_mask] == x 

107 else: 

108 new_mask = arr == x 

109 

110 if not isinstance(new_mask, np.ndarray): 

111 # usually BooleanArray 

112 new_mask = new_mask.to_numpy(dtype=bool, na_value=False) 

113 mask |= new_mask 

114 

115 if na_mask.any(): 

116 mask |= isna(arr) 

117 

118 return mask 

119 

120 

121def clean_fill_method(method: str | None, allow_nearest: bool = False): 

122 # asfreq is compat for resampling 

123 if method in [None, "asfreq"]: 

124 return None 

125 

126 if isinstance(method, str): 

127 method = method.lower() 

128 if method == "ffill": 

129 method = "pad" 

130 elif method == "bfill": 

131 method = "backfill" 

132 

133 valid_methods = ["pad", "backfill"] 

134 expecting = "pad (ffill) or backfill (bfill)" 

135 if allow_nearest: 

136 valid_methods.append("nearest") 

137 expecting = "pad (ffill), backfill (bfill) or nearest" 

138 if method not in valid_methods: 

139 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}") 

140 return method 

141 

142 

143# interpolation methods that dispatch to np.interp 

144 

145NP_METHODS = ["linear", "time", "index", "values"] 

146 

147# interpolation methods that dispatch to _interpolate_scipy_wrapper 

148 

149SP_METHODS = [ 

150 "nearest", 

151 "zero", 

152 "slinear", 

153 "quadratic", 

154 "cubic", 

155 "barycentric", 

156 "krogh", 

157 "spline", 

158 "polynomial", 

159 "from_derivatives", 

160 "piecewise_polynomial", 

161 "pchip", 

162 "akima", 

163 "cubicspline", 

164] 

165 

166 

167def clean_interp_method(method: str, index: Index, **kwargs) -> str: 

168 order = kwargs.get("order") 

169 

170 if method in ("spline", "polynomial") and order is None: 

171 raise ValueError("You must specify the order of the spline or polynomial.") 

172 

173 valid = NP_METHODS + SP_METHODS 

174 if method not in valid: 

175 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.") 

176 

177 if method in ("krogh", "piecewise_polynomial", "pchip"): 

178 if not index.is_monotonic_increasing: 

179 raise ValueError( 

180 f"{method} interpolation requires that the index be monotonic." 

181 ) 

182 

183 return method 

184 

185 

186def find_valid_index( 

187 values, *, how: str, is_valid: npt.NDArray[np.bool_] 

188) -> int | None: 

189 """ 

190 Retrieves the index of the first valid value. 

191 

192 Parameters 

193 ---------- 

194 values : ndarray or ExtensionArray 

195 how : {'first', 'last'} 

196 Use this parameter to change between the first or last valid index. 

197 is_valid: np.ndarray 

198 Mask to find na_values. 

199 

200 Returns 

201 ------- 

202 int or None 

203 """ 

204 assert how in ["first", "last"] 

205 

206 if len(values) == 0: # early stop 

207 return None 

208 

209 if values.ndim == 2: 

210 is_valid = is_valid.any(axis=1) # reduce axis 1 

211 

212 if how == "first": 

213 idxpos = is_valid[::].argmax() 

214 

215 elif how == "last": 

216 idxpos = len(values) - 1 - is_valid[::-1].argmax() 

217 

218 chk_notna = is_valid[idxpos] 

219 

220 if not chk_notna: 

221 return None 

222 # Incompatible return value type (got "signedinteger[Any]", 

223 # expected "Optional[int]") 

224 return idxpos # type: ignore[return-value] 

225 

226 

227def interpolate_array_2d( 

228 data: np.ndarray, 

229 method: str = "pad", 

230 axis: AxisInt = 0, 

231 index: Index | None = None, 

232 limit: int | None = None, 

233 limit_direction: str = "forward", 

234 limit_area: str | None = None, 

235 fill_value: Any | None = None, 

236 coerce: bool = False, 

237 downcast: str | None = None, 

238 **kwargs, 

239) -> None: 

240 """ 

241 Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill. 

242 

243 Notes 

244 ----- 

245 Alters 'data' in-place. 

246 """ 

247 try: 

248 m = clean_fill_method(method) 

249 except ValueError: 

250 m = None 

251 

252 if m is not None: 

253 if fill_value is not None: 

254 # similar to validate_fillna_kwargs 

255 raise ValueError("Cannot pass both fill_value and method") 

256 

257 interpolate_2d( 

258 data, 

259 method=m, 

260 axis=axis, 

261 limit=limit, 

262 limit_area=limit_area, 

263 ) 

264 else: 

265 assert index is not None # for mypy 

266 

267 _interpolate_2d_with_fill( 

268 data=data, 

269 index=index, 

270 axis=axis, 

271 method=method, 

272 limit=limit, 

273 limit_direction=limit_direction, 

274 limit_area=limit_area, 

275 fill_value=fill_value, 

276 **kwargs, 

277 ) 

278 

279 

280def _interpolate_2d_with_fill( 

281 data: np.ndarray, # floating dtype 

282 index: Index, 

283 axis: AxisInt, 

284 method: str = "linear", 

285 limit: int | None = None, 

286 limit_direction: str = "forward", 

287 limit_area: str | None = None, 

288 fill_value: Any | None = None, 

289 **kwargs, 

290) -> None: 

291 """ 

292 Column-wise application of _interpolate_1d. 

293 

294 Notes 

295 ----- 

296 Alters 'data' in-place. 

297 

298 The signature does differ from _interpolate_1d because it only 

299 includes what is needed for Block.interpolate. 

300 """ 

301 # validate the interp method 

302 clean_interp_method(method, index, **kwargs) 

303 

304 if is_valid_na_for_dtype(fill_value, data.dtype): 

305 fill_value = na_value_for_dtype(data.dtype, compat=False) 

306 

307 if method == "time": 

308 if not needs_i8_conversion(index.dtype): 

309 raise ValueError( 

310 "time-weighted interpolation only works " 

311 "on Series or DataFrames with a " 

312 "DatetimeIndex" 

313 ) 

314 method = "values" 

315 

316 valid_limit_directions = ["forward", "backward", "both"] 

317 limit_direction = limit_direction.lower() 

318 if limit_direction not in valid_limit_directions: 

319 raise ValueError( 

320 "Invalid limit_direction: expecting one of " 

321 f"{valid_limit_directions}, got '{limit_direction}'." 

322 ) 

323 

324 if limit_area is not None: 

325 valid_limit_areas = ["inside", "outside"] 

326 limit_area = limit_area.lower() 

327 if limit_area not in valid_limit_areas: 

328 raise ValueError( 

329 f"Invalid limit_area: expecting one of {valid_limit_areas}, got " 

330 f"{limit_area}." 

331 ) 

332 

333 # default limit is unlimited GH #16282 

334 limit = algos.validate_limit(nobs=None, limit=limit) 

335 

336 indices = _index_to_interp_indices(index, method) 

337 

338 def func(yvalues: np.ndarray) -> None: 

339 # process 1-d slices in the axis direction 

340 

341 _interpolate_1d( 

342 indices=indices, 

343 yvalues=yvalues, 

344 method=method, 

345 limit=limit, 

346 limit_direction=limit_direction, 

347 limit_area=limit_area, 

348 fill_value=fill_value, 

349 bounds_error=False, 

350 **kwargs, 

351 ) 

352 

353 # error: Argument 1 to "apply_along_axis" has incompatible type 

354 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[..., 

355 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray 

356 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]], 

357 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]], 

358 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]" 

359 np.apply_along_axis(func, axis, data) # type: ignore[arg-type] 

360 

361 

362def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: 

363 """ 

364 Convert Index to ndarray of indices to pass to NumPy/SciPy. 

365 """ 

366 xarr = index._values 

367 if needs_i8_conversion(xarr.dtype): 

368 # GH#1646 for dt64tz 

369 xarr = xarr.view("i8") 

370 

371 if method == "linear": 

372 inds = xarr 

373 inds = cast(np.ndarray, inds) 

374 else: 

375 inds = np.asarray(xarr) 

376 

377 if method in ("values", "index"): 

378 if inds.dtype == np.object_: 

379 inds = lib.maybe_convert_objects(inds) 

380 

381 return inds 

382 

383 

384def _interpolate_1d( 

385 indices: np.ndarray, 

386 yvalues: np.ndarray, 

387 method: str | None = "linear", 

388 limit: int | None = None, 

389 limit_direction: str = "forward", 

390 limit_area: str | None = None, 

391 fill_value: Any | None = None, 

392 bounds_error: bool = False, 

393 order: int | None = None, 

394 **kwargs, 

395) -> None: 

396 """ 

397 Logic for the 1-d interpolation. The input 

398 indices and yvalues will each be 1-d arrays of the same length. 

399 

400 Bounds_error is currently hardcoded to False since non-scipy ones don't 

401 take it as an argument. 

402 

403 Notes 

404 ----- 

405 Fills 'yvalues' in-place. 

406 """ 

407 

408 invalid = isna(yvalues) 

409 valid = ~invalid 

410 

411 if not valid.any(): 

412 return 

413 

414 if valid.all(): 

415 return 

416 

417 # These are sets of index pointers to invalid values... i.e. {0, 1, etc... 

418 all_nans = set(np.flatnonzero(invalid)) 

419 

420 first_valid_index = find_valid_index(yvalues, how="first", is_valid=valid) 

421 if first_valid_index is None: # no nan found in start 

422 first_valid_index = 0 

423 start_nans = set(range(first_valid_index)) 

424 

425 last_valid_index = find_valid_index(yvalues, how="last", is_valid=valid) 

426 if last_valid_index is None: # no nan found in end 

427 last_valid_index = len(yvalues) 

428 end_nans = set(range(1 + last_valid_index, len(valid))) 

429 

430 # Like the sets above, preserve_nans contains indices of invalid values, 

431 # but in this case, it is the final set of indices that need to be 

432 # preserved as NaN after the interpolation. 

433 

434 # For example if limit_direction='forward' then preserve_nans will 

435 # contain indices of NaNs at the beginning of the series, and NaNs that 

436 # are more than 'limit' away from the prior non-NaN. 

437 

438 # set preserve_nans based on direction using _interp_limit 

439 preserve_nans: list | set 

440 if limit_direction == "forward": 

441 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) 

442 elif limit_direction == "backward": 

443 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) 

444 else: 

445 # both directions... just use _interp_limit 

446 preserve_nans = set(_interp_limit(invalid, limit, limit)) 

447 

448 # if limit_area is set, add either mid or outside indices 

449 # to preserve_nans GH #16284 

450 if limit_area == "inside": 

451 # preserve NaNs on the outside 

452 preserve_nans |= start_nans | end_nans 

453 elif limit_area == "outside": 

454 # preserve NaNs on the inside 

455 mid_nans = all_nans - start_nans - end_nans 

456 preserve_nans |= mid_nans 

457 

458 # sort preserve_nans and convert to list 

459 preserve_nans = sorted(preserve_nans) 

460 

461 is_datetimelike = needs_i8_conversion(yvalues.dtype) 

462 

463 if is_datetimelike: 

464 yvalues = yvalues.view("i8") 

465 

466 if method in NP_METHODS: 

467 # np.interp requires sorted X values, #21037 

468 

469 indexer = np.argsort(indices[valid]) 

470 yvalues[invalid] = np.interp( 

471 indices[invalid], indices[valid][indexer], yvalues[valid][indexer] 

472 ) 

473 else: 

474 yvalues[invalid] = _interpolate_scipy_wrapper( 

475 indices[valid], 

476 yvalues[valid], 

477 indices[invalid], 

478 method=method, 

479 fill_value=fill_value, 

480 bounds_error=bounds_error, 

481 order=order, 

482 **kwargs, 

483 ) 

484 

485 if is_datetimelike: 

486 yvalues[preserve_nans] = NaT.value 

487 else: 

488 yvalues[preserve_nans] = np.nan 

489 return 

490 

491 

492def _interpolate_scipy_wrapper( 

493 x, 

494 y, 

495 new_x, 

496 method, 

497 fill_value=None, 

498 bounds_error: bool = False, 

499 order=None, 

500 **kwargs, 

501): 

502 """ 

503 Passed off to scipy.interpolate.interp1d. method is scipy's kind. 

504 Returns an array interpolated at new_x. Add any new methods to 

505 the list in _clean_interp_method. 

506 """ 

507 extra = f"{method} interpolation requires SciPy." 

508 import_optional_dependency("scipy", extra=extra) 

509 from scipy import interpolate 

510 

511 new_x = np.asarray(new_x) 

512 

513 # ignores some kwargs that could be passed along. 

514 alt_methods = { 

515 "barycentric": interpolate.barycentric_interpolate, 

516 "krogh": interpolate.krogh_interpolate, 

517 "from_derivatives": _from_derivatives, 

518 "piecewise_polynomial": _from_derivatives, 

519 } 

520 

521 if getattr(x, "_is_all_dates", False): 

522 # GH 5975, scipy.interp1d can't handle datetime64s 

523 x, new_x = x._values.astype("i8"), new_x.astype("i8") 

524 

525 if method == "pchip": 

526 alt_methods["pchip"] = interpolate.pchip_interpolate 

527 elif method == "akima": 

528 alt_methods["akima"] = _akima_interpolate 

529 elif method == "cubicspline": 

530 alt_methods["cubicspline"] = _cubicspline_interpolate 

531 

532 interp1d_methods = [ 

533 "nearest", 

534 "zero", 

535 "slinear", 

536 "quadratic", 

537 "cubic", 

538 "polynomial", 

539 ] 

540 if method in interp1d_methods: 

541 if method == "polynomial": 

542 method = order 

543 terp = interpolate.interp1d( 

544 x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error 

545 ) 

546 new_y = terp(new_x) 

547 elif method == "spline": 

548 # GH #10633, #24014 

549 if isna(order) or (order <= 0): 

550 raise ValueError( 

551 f"order needs to be specified and greater than 0; got order: {order}" 

552 ) 

553 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) 

554 new_y = terp(new_x) 

555 else: 

556 # GH 7295: need to be able to write for some reason 

557 # in some circumstances: check all three 

558 if not x.flags.writeable: 

559 x = x.copy() 

560 if not y.flags.writeable: 

561 y = y.copy() 

562 if not new_x.flags.writeable: 

563 new_x = new_x.copy() 

564 method = alt_methods[method] 

565 new_y = method(x, y, new_x, **kwargs) 

566 return new_y 

567 

568 

569def _from_derivatives( 

570 xi, yi, x, order=None, der: int | list[int] | None = 0, extrapolate: bool = False 

571): 

572 """ 

573 Convenience function for interpolate.BPoly.from_derivatives. 

574 

575 Construct a piecewise polynomial in the Bernstein basis, compatible 

576 with the specified values and derivatives at breakpoints. 

577 

578 Parameters 

579 ---------- 

580 xi : array-like 

581 sorted 1D array of x-coordinates 

582 yi : array-like or list of array-likes 

583 yi[i][j] is the j-th derivative known at xi[i] 

584 order: None or int or array-like of ints. Default: None. 

585 Specifies the degree of local polynomials. If not None, some 

586 derivatives are ignored. 

587 der : int or list 

588 How many derivatives to extract; None for all potentially nonzero 

589 derivatives (that is a number equal to the number of points), or a 

590 list of derivatives to extract. This number includes the function 

591 value as 0th derivative. 

592 extrapolate : bool, optional 

593 Whether to extrapolate to ouf-of-bounds points based on first and last 

594 intervals, or to return NaNs. Default: True. 

595 

596 See Also 

597 -------- 

598 scipy.interpolate.BPoly.from_derivatives 

599 

600 Returns 

601 ------- 

602 y : scalar or array-like 

603 The result, of length R or length M or M by R. 

604 """ 

605 from scipy import interpolate 

606 

607 # return the method for compat with scipy version & backwards compat 

608 method = interpolate.BPoly.from_derivatives 

609 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate) 

610 

611 return m(x) 

612 

613 

614def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt = 0): 

615 """ 

616 Convenience function for akima interpolation. 

617 xi and yi are arrays of values used to approximate some function f, 

618 with ``yi = f(xi)``. 

619 

620 See `Akima1DInterpolator` for details. 

621 

622 Parameters 

623 ---------- 

624 xi : array-like 

625 A sorted list of x-coordinates, of length N. 

626 yi : array-like 

627 A 1-D array of real values. `yi`'s length along the interpolation 

628 axis must be equal to the length of `xi`. If N-D array, use axis 

629 parameter to select correct axis. 

630 x : scalar or array-like 

631 Of length M. 

632 der : int, optional 

633 How many derivatives to extract; None for all potentially 

634 nonzero derivatives (that is a number equal to the number 

635 of points), or a list of derivatives to extract. This number 

636 includes the function value as 0th derivative. 

637 axis : int, optional 

638 Axis in the yi array corresponding to the x-coordinate values. 

639 

640 See Also 

641 -------- 

642 scipy.interpolate.Akima1DInterpolator 

643 

644 Returns 

645 ------- 

646 y : scalar or array-like 

647 The result, of length R or length M or M by R, 

648 

649 """ 

650 from scipy import interpolate 

651 

652 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) 

653 

654 return P(x, nu=der) 

655 

656 

657def _cubicspline_interpolate( 

658 xi, 

659 yi, 

660 x, 

661 axis: AxisInt = 0, 

662 bc_type: str | tuple[Any, Any] = "not-a-knot", 

663 extrapolate=None, 

664): 

665 """ 

666 Convenience function for cubic spline data interpolator. 

667 

668 See `scipy.interpolate.CubicSpline` for details. 

669 

670 Parameters 

671 ---------- 

672 xi : array-like, shape (n,) 

673 1-d array containing values of the independent variable. 

674 Values must be real, finite and in strictly increasing order. 

675 yi : array-like 

676 Array containing values of the dependent variable. It can have 

677 arbitrary number of dimensions, but the length along ``axis`` 

678 (see below) must match the length of ``x``. Values must be finite. 

679 x : scalar or array-like, shape (m,) 

680 axis : int, optional 

681 Axis along which `y` is assumed to be varying. Meaning that for 

682 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. 

683 Default is 0. 

684 bc_type : string or 2-tuple, optional 

685 Boundary condition type. Two additional equations, given by the 

686 boundary conditions, are required to determine all coefficients of 

687 polynomials on each segment [2]_. 

688 If `bc_type` is a string, then the specified condition will be applied 

689 at both ends of a spline. Available conditions are: 

690 * 'not-a-knot' (default): The first and second segment at a curve end 

691 are the same polynomial. It is a good default when there is no 

692 information on boundary conditions. 

693 * 'periodic': The interpolated functions is assumed to be periodic 

694 of period ``x[-1] - x[0]``. The first and last value of `y` must be 

695 identical: ``y[0] == y[-1]``. This boundary condition will result in 

696 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``. 

697 * 'clamped': The first derivative at curves ends are zero. Assuming 

698 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition. 

699 * 'natural': The second derivative at curve ends are zero. Assuming 

700 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition. 

701 If `bc_type` is a 2-tuple, the first and the second value will be 

702 applied at the curve start and end respectively. The tuple values can 

703 be one of the previously mentioned strings (except 'periodic') or a 

704 tuple `(order, deriv_values)` allowing to specify arbitrary 

705 derivatives at curve ends: 

706 * `order`: the derivative order, 1 or 2. 

707 * `deriv_value`: array-like containing derivative values, shape must 

708 be the same as `y`, excluding ``axis`` dimension. For example, if 

709 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with 

710 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D 

711 and have the shape (n0, n1). 

712 extrapolate : {bool, 'periodic', None}, optional 

713 If bool, determines whether to extrapolate to out-of-bounds points 

714 based on first and last intervals, or to return NaNs. If 'periodic', 

715 periodic extrapolation is used. If None (default), ``extrapolate`` is 

716 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise. 

717 

718 See Also 

719 -------- 

720 scipy.interpolate.CubicHermiteSpline 

721 

722 Returns 

723 ------- 

724 y : scalar or array-like 

725 The result, of shape (m,) 

726 

727 References 

728 ---------- 

729 .. [1] `Cubic Spline Interpolation 

730 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_ 

731 on Wikiversity. 

732 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978. 

733 """ 

734 from scipy import interpolate 

735 

736 P = interpolate.CubicSpline( 

737 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate 

738 ) 

739 

740 return P(x) 

741 

742 

743def _interpolate_with_limit_area( 

744 values: np.ndarray, method: str, limit: int | None, limit_area: str | None 

745) -> None: 

746 """ 

747 Apply interpolation and limit_area logic to values along a to-be-specified axis. 

748 

749 Parameters 

750 ---------- 

751 values: np.ndarray 

752 Input array. 

753 method: str 

754 Interpolation method. Could be "bfill" or "pad" 

755 limit: int, optional 

756 Index limit on interpolation. 

757 limit_area: str 

758 Limit area for interpolation. Can be "inside" or "outside" 

759 

760 Notes 

761 ----- 

762 Modifies values in-place. 

763 """ 

764 

765 invalid = isna(values) 

766 is_valid = ~invalid 

767 

768 if not invalid.all(): 

769 first = find_valid_index(values, how="first", is_valid=is_valid) 

770 if first is None: 

771 first = 0 

772 last = find_valid_index(values, how="last", is_valid=is_valid) 

773 if last is None: 

774 last = len(values) 

775 

776 interpolate_2d( 

777 values, 

778 method=method, 

779 limit=limit, 

780 ) 

781 

782 if limit_area == "inside": 

783 invalid[first : last + 1] = False 

784 elif limit_area == "outside": 

785 invalid[:first] = invalid[last + 1 :] = False 

786 

787 values[invalid] = np.nan 

788 

789 

790def interpolate_2d( 

791 values: np.ndarray, 

792 method: str = "pad", 

793 axis: Axis = 0, 

794 limit: int | None = None, 

795 limit_area: str | None = None, 

796) -> None: 

797 """ 

798 Perform an actual interpolation of values, values will be make 2-d if 

799 needed fills inplace, returns the result. 

800 

801 Parameters 

802 ---------- 

803 values: np.ndarray 

804 Input array. 

805 method: str, default "pad" 

806 Interpolation method. Could be "bfill" or "pad" 

807 axis: 0 or 1 

808 Interpolation axis 

809 limit: int, optional 

810 Index limit on interpolation. 

811 limit_area: str, optional 

812 Limit area for interpolation. Can be "inside" or "outside" 

813 

814 Notes 

815 ----- 

816 Modifies values in-place. 

817 """ 

818 if limit_area is not None: 

819 np.apply_along_axis( 

820 # error: Argument 1 to "apply_along_axis" has incompatible type 

821 # "partial[None]"; expected 

822 # "Callable[..., Union[_SupportsArray[dtype[<nothing>]], 

823 # Sequence[_SupportsArray[dtype[<nothing>]]], 

824 # Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]], 

825 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]], 

826 # Sequence[Sequence[Sequence[Sequence[_ 

827 # SupportsArray[dtype[<nothing>]]]]]]]]" 

828 partial( # type: ignore[arg-type] 

829 _interpolate_with_limit_area, 

830 method=method, 

831 limit=limit, 

832 limit_area=limit_area, 

833 ), 

834 # error: Argument 2 to "apply_along_axis" has incompatible type 

835 # "Union[str, int]"; expected "SupportsIndex" 

836 axis, # type: ignore[arg-type] 

837 values, 

838 ) 

839 return 

840 

841 transf = (lambda x: x) if axis == 0 else (lambda x: x.T) 

842 

843 # reshape a 1 dim if needed 

844 if values.ndim == 1: 

845 if axis != 0: # pragma: no cover 

846 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") 

847 values = values.reshape(tuple((1,) + values.shape)) 

848 

849 method = clean_fill_method(method) 

850 tvalues = transf(values) 

851 

852 # _pad_2d and _backfill_2d both modify tvalues inplace 

853 if method == "pad": 

854 _pad_2d(tvalues, limit=limit) 

855 else: 

856 _backfill_2d(tvalues, limit=limit) 

857 

858 return 

859 

860 

861def _fillna_prep( 

862 values, mask: npt.NDArray[np.bool_] | None = None 

863) -> npt.NDArray[np.bool_]: 

864 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d 

865 

866 if mask is None: 

867 mask = isna(values) 

868 

869 mask = mask.view(np.uint8) 

870 return mask 

871 

872 

873def _datetimelike_compat(func: F) -> F: 

874 """ 

875 Wrapper to handle datetime64 and timedelta64 dtypes. 

876 """ 

877 

878 @wraps(func) 

879 def new_func(values, limit=None, mask=None): 

880 if needs_i8_conversion(values.dtype): 

881 if mask is None: 

882 # This needs to occur before casting to int64 

883 mask = isna(values) 

884 

885 result, mask = func(values.view("i8"), limit=limit, mask=mask) 

886 return result.view(values.dtype), mask 

887 

888 return func(values, limit=limit, mask=mask) 

889 

890 return cast(F, new_func) 

891 

892 

893@_datetimelike_compat 

894def _pad_1d( 

895 values: np.ndarray, 

896 limit: int | None = None, 

897 mask: npt.NDArray[np.bool_] | None = None, 

898) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: 

899 mask = _fillna_prep(values, mask) 

900 algos.pad_inplace(values, mask, limit=limit) 

901 return values, mask 

902 

903 

904@_datetimelike_compat 

905def _backfill_1d( 

906 values: np.ndarray, 

907 limit: int | None = None, 

908 mask: npt.NDArray[np.bool_] | None = None, 

909) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: 

910 mask = _fillna_prep(values, mask) 

911 algos.backfill_inplace(values, mask, limit=limit) 

912 return values, mask 

913 

914 

915@_datetimelike_compat 

916def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None): 

917 mask = _fillna_prep(values, mask) 

918 

919 if np.all(values.shape): 

920 algos.pad_2d_inplace(values, mask, limit=limit) 

921 else: 

922 # for test coverage 

923 pass 

924 return values, mask 

925 

926 

927@_datetimelike_compat 

928def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None): 

929 mask = _fillna_prep(values, mask) 

930 

931 if np.all(values.shape): 

932 algos.backfill_2d_inplace(values, mask, limit=limit) 

933 else: 

934 # for test coverage 

935 pass 

936 return values, mask 

937 

938 

939_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} 

940 

941 

942def get_fill_func(method, ndim: int = 1): 

943 method = clean_fill_method(method) 

944 if ndim == 1: 

945 return _fill_methods[method] 

946 return {"pad": _pad_2d, "backfill": _backfill_2d}[method] 

947 

948 

949def clean_reindex_fill_method(method) -> str | None: 

950 return clean_fill_method(method, allow_nearest=True) 

951 

952 

953def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit): 

954 """ 

955 Get indexers of values that won't be filled 

956 because they exceed the limits. 

957 

958 Parameters 

959 ---------- 

960 invalid : np.ndarray[bool] 

961 fw_limit : int or None 

962 forward limit to index 

963 bw_limit : int or None 

964 backward limit to index 

965 

966 Returns 

967 ------- 

968 set of indexers 

969 

970 Notes 

971 ----- 

972 This is equivalent to the more readable, but slower 

973 

974 .. code-block:: python 

975 

976 def _interp_limit(invalid, fw_limit, bw_limit): 

977 for x in np.where(invalid)[0]: 

978 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): 

979 yield x 

980 """ 

981 # handle forward first; the backward direction is the same except 

982 # 1. operate on the reversed array 

983 # 2. subtract the returned indices from N - 1 

984 N = len(invalid) 

985 f_idx = set() 

986 b_idx = set() 

987 

988 def inner(invalid, limit): 

989 limit = min(limit, N) 

990 windowed = _rolling_window(invalid, limit + 1).all(1) 

991 idx = set(np.where(windowed)[0] + limit) | set( 

992 np.where((~invalid[: limit + 1]).cumsum() == 0)[0] 

993 ) 

994 return idx 

995 

996 if fw_limit is not None: 

997 if fw_limit == 0: 

998 f_idx = set(np.where(invalid)[0]) 

999 else: 

1000 f_idx = inner(invalid, fw_limit) 

1001 

1002 if bw_limit is not None: 

1003 if bw_limit == 0: 

1004 # then we don't even need to care about backwards 

1005 # just use forwards 

1006 return f_idx 

1007 else: 

1008 b_idx_inv = list(inner(invalid[::-1], bw_limit)) 

1009 b_idx = set(N - 1 - np.asarray(b_idx_inv)) 

1010 if fw_limit == 0: 

1011 return b_idx 

1012 

1013 return f_idx & b_idx 

1014 

1015 

1016def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]: 

1017 """ 

1018 [True, True, False, True, False], 2 -> 

1019 

1020 [ 

1021 [True, True], 

1022 [True, False], 

1023 [False, True], 

1024 [True, False], 

1025 ] 

1026 """ 

1027 # https://stackoverflow.com/a/6811241 

1028 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) 

1029 strides = a.strides + (a.strides[-1],) 

1030 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)