Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/missing.py: 63%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

364 statements  

1""" 

2Routines for filling missing data. 

3""" 

4from __future__ import annotations 

5 

6from functools import wraps 

7from typing import ( 

8 TYPE_CHECKING, 

9 Any, 

10 Literal, 

11 cast, 

12 overload, 

13) 

14 

15import numpy as np 

16 

17from pandas._libs import ( 

18 NaT, 

19 algos, 

20 lib, 

21) 

22from pandas._typing import ( 

23 ArrayLike, 

24 AxisInt, 

25 F, 

26 ReindexMethod, 

27 npt, 

28) 

29from pandas.compat._optional import import_optional_dependency 

30 

31from pandas.core.dtypes.cast import infer_dtype_from 

32from pandas.core.dtypes.common import ( 

33 is_array_like, 

34 is_bool_dtype, 

35 is_numeric_dtype, 

36 is_numeric_v_string_like, 

37 is_object_dtype, 

38 needs_i8_conversion, 

39) 

40from pandas.core.dtypes.dtypes import DatetimeTZDtype 

41from pandas.core.dtypes.missing import ( 

42 is_valid_na_for_dtype, 

43 isna, 

44 na_value_for_dtype, 

45) 

46 

47if TYPE_CHECKING: 

48 from pandas import Index 

49 

50 

51def check_value_size(value, mask: npt.NDArray[np.bool_], length: int): 

52 """ 

53 Validate the size of the values passed to ExtensionArray.fillna. 

54 """ 

55 if is_array_like(value): 

56 if len(value) != length: 

57 raise ValueError( 

58 f"Length of 'value' does not match. Got ({len(value)}) " 

59 f" expected {length}" 

60 ) 

61 value = value[mask] 

62 

63 return value 

64 

65 

66def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: 

67 """ 

68 Return a masking array of same size/shape as arr 

69 with entries equaling any member of values_to_mask set to True 

70 

71 Parameters 

72 ---------- 

73 arr : ArrayLike 

74 values_to_mask: list, tuple, or scalar 

75 

76 Returns 

77 ------- 

78 np.ndarray[bool] 

79 """ 

80 # When called from Block.replace/replace_list, values_to_mask is a scalar 

81 # known to be holdable by arr. 

82 # When called from Series._single_replace, values_to_mask is tuple or list 

83 dtype, values_to_mask = infer_dtype_from(values_to_mask) 

84 

85 if isinstance(dtype, np.dtype): 

86 values_to_mask = np.array(values_to_mask, dtype=dtype) 

87 else: 

88 cls = dtype.construct_array_type() 

89 if not lib.is_list_like(values_to_mask): 

90 values_to_mask = [values_to_mask] 

91 values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False) 

92 

93 potential_na = False 

94 if is_object_dtype(arr.dtype): 

95 # pre-compute mask to avoid comparison to NA 

96 potential_na = True 

97 arr_mask = ~isna(arr) 

98 

99 na_mask = isna(values_to_mask) 

100 nonna = values_to_mask[~na_mask] 

101 

102 # GH 21977 

103 mask = np.zeros(arr.shape, dtype=bool) 

104 if ( 

105 is_numeric_dtype(arr.dtype) 

106 and not is_bool_dtype(arr.dtype) 

107 and is_bool_dtype(nonna.dtype) 

108 ): 

109 pass 

110 elif ( 

111 is_bool_dtype(arr.dtype) 

112 and is_numeric_dtype(nonna.dtype) 

113 and not is_bool_dtype(nonna.dtype) 

114 ): 

115 pass 

116 else: 

117 for x in nonna: 

118 if is_numeric_v_string_like(arr, x): 

119 # GH#29553 prevent numpy deprecation warnings 

120 pass 

121 else: 

122 if potential_na: 

123 new_mask = np.zeros(arr.shape, dtype=np.bool_) 

124 new_mask[arr_mask] = arr[arr_mask] == x 

125 else: 

126 new_mask = arr == x 

127 

128 if not isinstance(new_mask, np.ndarray): 

129 # usually BooleanArray 

130 new_mask = new_mask.to_numpy(dtype=bool, na_value=False) 

131 mask |= new_mask 

132 

133 if na_mask.any(): 

134 mask |= isna(arr) 

135 

136 return mask 

137 

138 

139@overload 

140def clean_fill_method( 

141 method: Literal["ffill", "pad", "bfill", "backfill"], 

142 *, 

143 allow_nearest: Literal[False] = ..., 

144) -> Literal["pad", "backfill"]: 

145 ... 

146 

147 

148@overload 

149def clean_fill_method( 

150 method: Literal["ffill", "pad", "bfill", "backfill", "nearest"], 

151 *, 

152 allow_nearest: Literal[True], 

153) -> Literal["pad", "backfill", "nearest"]: 

154 ... 

155 

156 

157def clean_fill_method( 

158 method: Literal["ffill", "pad", "bfill", "backfill", "nearest"], 

159 *, 

160 allow_nearest: bool = False, 

161) -> Literal["pad", "backfill", "nearest"]: 

162 if isinstance(method, str): 

163 # error: Incompatible types in assignment (expression has type "str", variable 

164 # has type "Literal['ffill', 'pad', 'bfill', 'backfill', 'nearest']") 

165 method = method.lower() # type: ignore[assignment] 

166 if method == "ffill": 

167 method = "pad" 

168 elif method == "bfill": 

169 method = "backfill" 

170 

171 valid_methods = ["pad", "backfill"] 

172 expecting = "pad (ffill) or backfill (bfill)" 

173 if allow_nearest: 

174 valid_methods.append("nearest") 

175 expecting = "pad (ffill), backfill (bfill) or nearest" 

176 if method not in valid_methods: 

177 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}") 

178 return method 

179 

180 

181# interpolation methods that dispatch to np.interp 

182 

183NP_METHODS = ["linear", "time", "index", "values"] 

184 

185# interpolation methods that dispatch to _interpolate_scipy_wrapper 

186 

187SP_METHODS = [ 

188 "nearest", 

189 "zero", 

190 "slinear", 

191 "quadratic", 

192 "cubic", 

193 "barycentric", 

194 "krogh", 

195 "spline", 

196 "polynomial", 

197 "from_derivatives", 

198 "piecewise_polynomial", 

199 "pchip", 

200 "akima", 

201 "cubicspline", 

202] 

203 

204 

205def clean_interp_method(method: str, index: Index, **kwargs) -> str: 

206 order = kwargs.get("order") 

207 

208 if method in ("spline", "polynomial") and order is None: 

209 raise ValueError("You must specify the order of the spline or polynomial.") 

210 

211 valid = NP_METHODS + SP_METHODS 

212 if method not in valid: 

213 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.") 

214 

215 if method in ("krogh", "piecewise_polynomial", "pchip"): 

216 if not index.is_monotonic_increasing: 

217 raise ValueError( 

218 f"{method} interpolation requires that the index be monotonic." 

219 ) 

220 

221 return method 

222 

223 

224def find_valid_index(how: str, is_valid: npt.NDArray[np.bool_]) -> int | None: 

225 """ 

226 Retrieves the positional index of the first valid value. 

227 

228 Parameters 

229 ---------- 

230 how : {'first', 'last'} 

231 Use this parameter to change between the first or last valid index. 

232 is_valid: np.ndarray 

233 Mask to find na_values. 

234 

235 Returns 

236 ------- 

237 int or None 

238 """ 

239 assert how in ["first", "last"] 

240 

241 if len(is_valid) == 0: # early stop 

242 return None 

243 

244 if is_valid.ndim == 2: 

245 is_valid = is_valid.any(axis=1) # reduce axis 1 

246 

247 if how == "first": 

248 idxpos = is_valid[::].argmax() 

249 

250 elif how == "last": 

251 idxpos = len(is_valid) - 1 - is_valid[::-1].argmax() 

252 

253 chk_notna = is_valid[idxpos] 

254 

255 if not chk_notna: 

256 return None 

257 # Incompatible return value type (got "signedinteger[Any]", 

258 # expected "Optional[int]") 

259 return idxpos # type: ignore[return-value] 

260 

261 

262def validate_limit_direction( 

263 limit_direction: str, 

264) -> Literal["forward", "backward", "both"]: 

265 valid_limit_directions = ["forward", "backward", "both"] 

266 limit_direction = limit_direction.lower() 

267 if limit_direction not in valid_limit_directions: 

268 raise ValueError( 

269 "Invalid limit_direction: expecting one of " 

270 f"{valid_limit_directions}, got '{limit_direction}'." 

271 ) 

272 # error: Incompatible return value type (got "str", expected 

273 # "Literal['forward', 'backward', 'both']") 

274 return limit_direction # type: ignore[return-value] 

275 

276 

277def validate_limit_area(limit_area: str | None) -> Literal["inside", "outside"] | None: 

278 if limit_area is not None: 

279 valid_limit_areas = ["inside", "outside"] 

280 limit_area = limit_area.lower() 

281 if limit_area not in valid_limit_areas: 

282 raise ValueError( 

283 f"Invalid limit_area: expecting one of {valid_limit_areas}, got " 

284 f"{limit_area}." 

285 ) 

286 # error: Incompatible return value type (got "Optional[str]", expected 

287 # "Optional[Literal['inside', 'outside']]") 

288 return limit_area # type: ignore[return-value] 

289 

290 

291def infer_limit_direction( 

292 limit_direction: Literal["backward", "forward", "both"] | None, method: str 

293) -> Literal["backward", "forward", "both"]: 

294 # Set `limit_direction` depending on `method` 

295 if limit_direction is None: 

296 if method in ("backfill", "bfill"): 

297 limit_direction = "backward" 

298 else: 

299 limit_direction = "forward" 

300 else: 

301 if method in ("pad", "ffill") and limit_direction != "forward": 

302 raise ValueError( 

303 f"`limit_direction` must be 'forward' for method `{method}`" 

304 ) 

305 if method in ("backfill", "bfill") and limit_direction != "backward": 

306 raise ValueError( 

307 f"`limit_direction` must be 'backward' for method `{method}`" 

308 ) 

309 return limit_direction 

310 

311 

312def get_interp_index(method, index: Index) -> Index: 

313 # create/use the index 

314 if method == "linear": 

315 # prior default 

316 from pandas import Index 

317 

318 index = Index(np.arange(len(index))) 

319 else: 

320 methods = {"index", "values", "nearest", "time"} 

321 is_numeric_or_datetime = ( 

322 is_numeric_dtype(index.dtype) 

323 or isinstance(index.dtype, DatetimeTZDtype) 

324 or lib.is_np_dtype(index.dtype, "mM") 

325 ) 

326 if method not in methods and not is_numeric_or_datetime: 

327 raise ValueError( 

328 "Index column must be numeric or datetime type when " 

329 f"using {method} method other than linear. " 

330 "Try setting a numeric or datetime index column before " 

331 "interpolating." 

332 ) 

333 

334 if isna(index).any(): 

335 raise NotImplementedError( 

336 "Interpolation with NaNs in the index " 

337 "has not been implemented. Try filling " 

338 "those NaNs before interpolating." 

339 ) 

340 return index 

341 

342 

343def interpolate_2d_inplace( 

344 data: np.ndarray, # floating dtype 

345 index: Index, 

346 axis: AxisInt, 

347 method: str = "linear", 

348 limit: int | None = None, 

349 limit_direction: str = "forward", 

350 limit_area: str | None = None, 

351 fill_value: Any | None = None, 

352 mask=None, 

353 **kwargs, 

354) -> None: 

355 """ 

356 Column-wise application of _interpolate_1d. 

357 

358 Notes 

359 ----- 

360 Alters 'data' in-place. 

361 

362 The signature does differ from _interpolate_1d because it only 

363 includes what is needed for Block.interpolate. 

364 """ 

365 # validate the interp method 

366 clean_interp_method(method, index, **kwargs) 

367 

368 if is_valid_na_for_dtype(fill_value, data.dtype): 

369 fill_value = na_value_for_dtype(data.dtype, compat=False) 

370 

371 if method == "time": 

372 if not needs_i8_conversion(index.dtype): 

373 raise ValueError( 

374 "time-weighted interpolation only works " 

375 "on Series or DataFrames with a " 

376 "DatetimeIndex" 

377 ) 

378 method = "values" 

379 

380 limit_direction = validate_limit_direction(limit_direction) 

381 limit_area_validated = validate_limit_area(limit_area) 

382 

383 # default limit is unlimited GH #16282 

384 limit = algos.validate_limit(nobs=None, limit=limit) 

385 

386 indices = _index_to_interp_indices(index, method) 

387 

388 def func(yvalues: np.ndarray) -> None: 

389 # process 1-d slices in the axis direction 

390 

391 _interpolate_1d( 

392 indices=indices, 

393 yvalues=yvalues, 

394 method=method, 

395 limit=limit, 

396 limit_direction=limit_direction, 

397 limit_area=limit_area_validated, 

398 fill_value=fill_value, 

399 bounds_error=False, 

400 mask=mask, 

401 **kwargs, 

402 ) 

403 

404 # error: Argument 1 to "apply_along_axis" has incompatible type 

405 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[..., 

406 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray 

407 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]], 

408 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]], 

409 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]" 

410 np.apply_along_axis(func, axis, data) # type: ignore[arg-type] 

411 

412 

413def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: 

414 """ 

415 Convert Index to ndarray of indices to pass to NumPy/SciPy. 

416 """ 

417 xarr = index._values 

418 if needs_i8_conversion(xarr.dtype): 

419 # GH#1646 for dt64tz 

420 xarr = xarr.view("i8") 

421 

422 if method == "linear": 

423 inds = xarr 

424 inds = cast(np.ndarray, inds) 

425 else: 

426 inds = np.asarray(xarr) 

427 

428 if method in ("values", "index"): 

429 if inds.dtype == np.object_: 

430 inds = lib.maybe_convert_objects(inds) 

431 

432 return inds 

433 

434 

435def _interpolate_1d( 

436 indices: np.ndarray, 

437 yvalues: np.ndarray, 

438 method: str = "linear", 

439 limit: int | None = None, 

440 limit_direction: str = "forward", 

441 limit_area: Literal["inside", "outside"] | None = None, 

442 fill_value: Any | None = None, 

443 bounds_error: bool = False, 

444 order: int | None = None, 

445 mask=None, 

446 **kwargs, 

447) -> None: 

448 """ 

449 Logic for the 1-d interpolation. The input 

450 indices and yvalues will each be 1-d arrays of the same length. 

451 

452 Bounds_error is currently hardcoded to False since non-scipy ones don't 

453 take it as an argument. 

454 

455 Notes 

456 ----- 

457 Fills 'yvalues' in-place. 

458 """ 

459 if mask is not None: 

460 invalid = mask 

461 else: 

462 invalid = isna(yvalues) 

463 valid = ~invalid 

464 

465 if not valid.any(): 

466 return 

467 

468 if valid.all(): 

469 return 

470 

471 # These are sets of index pointers to invalid values... i.e. {0, 1, etc... 

472 all_nans = set(np.flatnonzero(invalid)) 

473 

474 first_valid_index = find_valid_index(how="first", is_valid=valid) 

475 if first_valid_index is None: # no nan found in start 

476 first_valid_index = 0 

477 start_nans = set(range(first_valid_index)) 

478 

479 last_valid_index = find_valid_index(how="last", is_valid=valid) 

480 if last_valid_index is None: # no nan found in end 

481 last_valid_index = len(yvalues) 

482 end_nans = set(range(1 + last_valid_index, len(valid))) 

483 

484 # Like the sets above, preserve_nans contains indices of invalid values, 

485 # but in this case, it is the final set of indices that need to be 

486 # preserved as NaN after the interpolation. 

487 

488 # For example if limit_direction='forward' then preserve_nans will 

489 # contain indices of NaNs at the beginning of the series, and NaNs that 

490 # are more than 'limit' away from the prior non-NaN. 

491 

492 # set preserve_nans based on direction using _interp_limit 

493 preserve_nans: list | set 

494 if limit_direction == "forward": 

495 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) 

496 elif limit_direction == "backward": 

497 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) 

498 else: 

499 # both directions... just use _interp_limit 

500 preserve_nans = set(_interp_limit(invalid, limit, limit)) 

501 

502 # if limit_area is set, add either mid or outside indices 

503 # to preserve_nans GH #16284 

504 if limit_area == "inside": 

505 # preserve NaNs on the outside 

506 preserve_nans |= start_nans | end_nans 

507 elif limit_area == "outside": 

508 # preserve NaNs on the inside 

509 mid_nans = all_nans - start_nans - end_nans 

510 preserve_nans |= mid_nans 

511 

512 # sort preserve_nans and convert to list 

513 preserve_nans = sorted(preserve_nans) 

514 

515 is_datetimelike = yvalues.dtype.kind in "mM" 

516 

517 if is_datetimelike: 

518 yvalues = yvalues.view("i8") 

519 

520 if method in NP_METHODS: 

521 # np.interp requires sorted X values, #21037 

522 

523 indexer = np.argsort(indices[valid]) 

524 yvalues[invalid] = np.interp( 

525 indices[invalid], indices[valid][indexer], yvalues[valid][indexer] 

526 ) 

527 else: 

528 yvalues[invalid] = _interpolate_scipy_wrapper( 

529 indices[valid], 

530 yvalues[valid], 

531 indices[invalid], 

532 method=method, 

533 fill_value=fill_value, 

534 bounds_error=bounds_error, 

535 order=order, 

536 **kwargs, 

537 ) 

538 

539 if mask is not None: 

540 mask[:] = False 

541 mask[preserve_nans] = True 

542 elif is_datetimelike: 

543 yvalues[preserve_nans] = NaT.value 

544 else: 

545 yvalues[preserve_nans] = np.nan 

546 return 

547 

548 

549def _interpolate_scipy_wrapper( 

550 x: np.ndarray, 

551 y: np.ndarray, 

552 new_x: np.ndarray, 

553 method: str, 

554 fill_value=None, 

555 bounds_error: bool = False, 

556 order=None, 

557 **kwargs, 

558): 

559 """ 

560 Passed off to scipy.interpolate.interp1d. method is scipy's kind. 

561 Returns an array interpolated at new_x. Add any new methods to 

562 the list in _clean_interp_method. 

563 """ 

564 extra = f"{method} interpolation requires SciPy." 

565 import_optional_dependency("scipy", extra=extra) 

566 from scipy import interpolate 

567 

568 new_x = np.asarray(new_x) 

569 

570 # ignores some kwargs that could be passed along. 

571 alt_methods = { 

572 "barycentric": interpolate.barycentric_interpolate, 

573 "krogh": interpolate.krogh_interpolate, 

574 "from_derivatives": _from_derivatives, 

575 "piecewise_polynomial": _from_derivatives, 

576 "cubicspline": _cubicspline_interpolate, 

577 "akima": _akima_interpolate, 

578 "pchip": interpolate.pchip_interpolate, 

579 } 

580 

581 interp1d_methods = [ 

582 "nearest", 

583 "zero", 

584 "slinear", 

585 "quadratic", 

586 "cubic", 

587 "polynomial", 

588 ] 

589 if method in interp1d_methods: 

590 if method == "polynomial": 

591 kind = order 

592 else: 

593 kind = method 

594 terp = interpolate.interp1d( 

595 x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error 

596 ) 

597 new_y = terp(new_x) 

598 elif method == "spline": 

599 # GH #10633, #24014 

600 if isna(order) or (order <= 0): 

601 raise ValueError( 

602 f"order needs to be specified and greater than 0; got order: {order}" 

603 ) 

604 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) 

605 new_y = terp(new_x) 

606 else: 

607 # GH 7295: need to be able to write for some reason 

608 # in some circumstances: check all three 

609 if not x.flags.writeable: 

610 x = x.copy() 

611 if not y.flags.writeable: 

612 y = y.copy() 

613 if not new_x.flags.writeable: 

614 new_x = new_x.copy() 

615 terp = alt_methods[method] 

616 new_y = terp(x, y, new_x, **kwargs) 

617 return new_y 

618 

619 

620def _from_derivatives( 

621 xi: np.ndarray, 

622 yi: np.ndarray, 

623 x: np.ndarray, 

624 order=None, 

625 der: int | list[int] | None = 0, 

626 extrapolate: bool = False, 

627): 

628 """ 

629 Convenience function for interpolate.BPoly.from_derivatives. 

630 

631 Construct a piecewise polynomial in the Bernstein basis, compatible 

632 with the specified values and derivatives at breakpoints. 

633 

634 Parameters 

635 ---------- 

636 xi : array-like 

637 sorted 1D array of x-coordinates 

638 yi : array-like or list of array-likes 

639 yi[i][j] is the j-th derivative known at xi[i] 

640 order: None or int or array-like of ints. Default: None. 

641 Specifies the degree of local polynomials. If not None, some 

642 derivatives are ignored. 

643 der : int or list 

644 How many derivatives to extract; None for all potentially nonzero 

645 derivatives (that is a number equal to the number of points), or a 

646 list of derivatives to extract. This number includes the function 

647 value as 0th derivative. 

648 extrapolate : bool, optional 

649 Whether to extrapolate to ouf-of-bounds points based on first and last 

650 intervals, or to return NaNs. Default: True. 

651 

652 See Also 

653 -------- 

654 scipy.interpolate.BPoly.from_derivatives 

655 

656 Returns 

657 ------- 

658 y : scalar or array-like 

659 The result, of length R or length M or M by R. 

660 """ 

661 from scipy import interpolate 

662 

663 # return the method for compat with scipy version & backwards compat 

664 method = interpolate.BPoly.from_derivatives 

665 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate) 

666 

667 return m(x) 

668 

669 

670def _akima_interpolate( 

671 xi: np.ndarray, 

672 yi: np.ndarray, 

673 x: np.ndarray, 

674 der: int | list[int] | None = 0, 

675 axis: AxisInt = 0, 

676): 

677 """ 

678 Convenience function for akima interpolation. 

679 xi and yi are arrays of values used to approximate some function f, 

680 with ``yi = f(xi)``. 

681 

682 See `Akima1DInterpolator` for details. 

683 

684 Parameters 

685 ---------- 

686 xi : np.ndarray 

687 A sorted list of x-coordinates, of length N. 

688 yi : np.ndarray 

689 A 1-D array of real values. `yi`'s length along the interpolation 

690 axis must be equal to the length of `xi`. If N-D array, use axis 

691 parameter to select correct axis. 

692 x : np.ndarray 

693 Of length M. 

694 der : int, optional 

695 How many derivatives to extract; None for all potentially 

696 nonzero derivatives (that is a number equal to the number 

697 of points), or a list of derivatives to extract. This number 

698 includes the function value as 0th derivative. 

699 axis : int, optional 

700 Axis in the yi array corresponding to the x-coordinate values. 

701 

702 See Also 

703 -------- 

704 scipy.interpolate.Akima1DInterpolator 

705 

706 Returns 

707 ------- 

708 y : scalar or array-like 

709 The result, of length R or length M or M by R, 

710 

711 """ 

712 from scipy import interpolate 

713 

714 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) 

715 

716 return P(x, nu=der) 

717 

718 

719def _cubicspline_interpolate( 

720 xi: np.ndarray, 

721 yi: np.ndarray, 

722 x: np.ndarray, 

723 axis: AxisInt = 0, 

724 bc_type: str | tuple[Any, Any] = "not-a-knot", 

725 extrapolate=None, 

726): 

727 """ 

728 Convenience function for cubic spline data interpolator. 

729 

730 See `scipy.interpolate.CubicSpline` for details. 

731 

732 Parameters 

733 ---------- 

734 xi : np.ndarray, shape (n,) 

735 1-d array containing values of the independent variable. 

736 Values must be real, finite and in strictly increasing order. 

737 yi : np.ndarray 

738 Array containing values of the dependent variable. It can have 

739 arbitrary number of dimensions, but the length along ``axis`` 

740 (see below) must match the length of ``x``. Values must be finite. 

741 x : np.ndarray, shape (m,) 

742 axis : int, optional 

743 Axis along which `y` is assumed to be varying. Meaning that for 

744 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. 

745 Default is 0. 

746 bc_type : string or 2-tuple, optional 

747 Boundary condition type. Two additional equations, given by the 

748 boundary conditions, are required to determine all coefficients of 

749 polynomials on each segment [2]_. 

750 If `bc_type` is a string, then the specified condition will be applied 

751 at both ends of a spline. Available conditions are: 

752 * 'not-a-knot' (default): The first and second segment at a curve end 

753 are the same polynomial. It is a good default when there is no 

754 information on boundary conditions. 

755 * 'periodic': The interpolated functions is assumed to be periodic 

756 of period ``x[-1] - x[0]``. The first and last value of `y` must be 

757 identical: ``y[0] == y[-1]``. This boundary condition will result in 

758 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``. 

759 * 'clamped': The first derivative at curves ends are zero. Assuming 

760 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition. 

761 * 'natural': The second derivative at curve ends are zero. Assuming 

762 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition. 

763 If `bc_type` is a 2-tuple, the first and the second value will be 

764 applied at the curve start and end respectively. The tuple values can 

765 be one of the previously mentioned strings (except 'periodic') or a 

766 tuple `(order, deriv_values)` allowing to specify arbitrary 

767 derivatives at curve ends: 

768 * `order`: the derivative order, 1 or 2. 

769 * `deriv_value`: array-like containing derivative values, shape must 

770 be the same as `y`, excluding ``axis`` dimension. For example, if 

771 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with 

772 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D 

773 and have the shape (n0, n1). 

774 extrapolate : {bool, 'periodic', None}, optional 

775 If bool, determines whether to extrapolate to out-of-bounds points 

776 based on first and last intervals, or to return NaNs. If 'periodic', 

777 periodic extrapolation is used. If None (default), ``extrapolate`` is 

778 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise. 

779 

780 See Also 

781 -------- 

782 scipy.interpolate.CubicHermiteSpline 

783 

784 Returns 

785 ------- 

786 y : scalar or array-like 

787 The result, of shape (m,) 

788 

789 References 

790 ---------- 

791 .. [1] `Cubic Spline Interpolation 

792 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_ 

793 on Wikiversity. 

794 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978. 

795 """ 

796 from scipy import interpolate 

797 

798 P = interpolate.CubicSpline( 

799 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate 

800 ) 

801 

802 return P(x) 

803 

804 

805def _interpolate_with_limit_area( 

806 values: np.ndarray, 

807 method: Literal["pad", "backfill"], 

808 limit: int | None, 

809 limit_area: Literal["inside", "outside"], 

810) -> None: 

811 """ 

812 Apply interpolation and limit_area logic to values along a to-be-specified axis. 

813 

814 Parameters 

815 ---------- 

816 values: np.ndarray 

817 Input array. 

818 method: str 

819 Interpolation method. Could be "bfill" or "pad" 

820 limit: int, optional 

821 Index limit on interpolation. 

822 limit_area: {'inside', 'outside'} 

823 Limit area for interpolation. 

824 

825 Notes 

826 ----- 

827 Modifies values in-place. 

828 """ 

829 

830 invalid = isna(values) 

831 is_valid = ~invalid 

832 

833 if not invalid.all(): 

834 first = find_valid_index(how="first", is_valid=is_valid) 

835 if first is None: 

836 first = 0 

837 last = find_valid_index(how="last", is_valid=is_valid) 

838 if last is None: 

839 last = len(values) 

840 

841 pad_or_backfill_inplace( 

842 values, 

843 method=method, 

844 limit=limit, 

845 limit_area=limit_area, 

846 ) 

847 

848 if limit_area == "inside": 

849 invalid[first : last + 1] = False 

850 elif limit_area == "outside": 

851 invalid[:first] = invalid[last + 1 :] = False 

852 else: 

853 raise ValueError("limit_area should be 'inside' or 'outside'") 

854 

855 values[invalid] = np.nan 

856 

857 

858def pad_or_backfill_inplace( 

859 values: np.ndarray, 

860 method: Literal["pad", "backfill"] = "pad", 

861 axis: AxisInt = 0, 

862 limit: int | None = None, 

863 limit_area: Literal["inside", "outside"] | None = None, 

864) -> None: 

865 """ 

866 Perform an actual interpolation of values, values will be make 2-d if 

867 needed fills inplace, returns the result. 

868 

869 Parameters 

870 ---------- 

871 values: np.ndarray 

872 Input array. 

873 method: str, default "pad" 

874 Interpolation method. Could be "bfill" or "pad" 

875 axis: 0 or 1 

876 Interpolation axis 

877 limit: int, optional 

878 Index limit on interpolation. 

879 limit_area: str, optional 

880 Limit area for interpolation. Can be "inside" or "outside" 

881 

882 Notes 

883 ----- 

884 Modifies values in-place. 

885 """ 

886 transf = (lambda x: x) if axis == 0 else (lambda x: x.T) 

887 

888 # reshape a 1 dim if needed 

889 if values.ndim == 1: 

890 if axis != 0: # pragma: no cover 

891 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") 

892 values = values.reshape(tuple((1,) + values.shape)) 

893 

894 method = clean_fill_method(method) 

895 tvalues = transf(values) 

896 

897 func = get_fill_func(method, ndim=2) 

898 # _pad_2d and _backfill_2d both modify tvalues inplace 

899 func(tvalues, limit=limit, limit_area=limit_area) 

900 

901 

902def _fillna_prep( 

903 values, mask: npt.NDArray[np.bool_] | None = None 

904) -> npt.NDArray[np.bool_]: 

905 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d 

906 

907 if mask is None: 

908 mask = isna(values) 

909 

910 return mask 

911 

912 

913def _datetimelike_compat(func: F) -> F: 

914 """ 

915 Wrapper to handle datetime64 and timedelta64 dtypes. 

916 """ 

917 

918 @wraps(func) 

919 def new_func( 

920 values, 

921 limit: int | None = None, 

922 limit_area: Literal["inside", "outside"] | None = None, 

923 mask=None, 

924 ): 

925 if needs_i8_conversion(values.dtype): 

926 if mask is None: 

927 # This needs to occur before casting to int64 

928 mask = isna(values) 

929 

930 result, mask = func( 

931 values.view("i8"), limit=limit, limit_area=limit_area, mask=mask 

932 ) 

933 return result.view(values.dtype), mask 

934 

935 return func(values, limit=limit, limit_area=limit_area, mask=mask) 

936 

937 return cast(F, new_func) 

938 

939 

940@_datetimelike_compat 

941def _pad_1d( 

942 values: np.ndarray, 

943 limit: int | None = None, 

944 limit_area: Literal["inside", "outside"] | None = None, 

945 mask: npt.NDArray[np.bool_] | None = None, 

946) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: 

947 mask = _fillna_prep(values, mask) 

948 if limit_area is not None and not mask.all(): 

949 _fill_limit_area_1d(mask, limit_area) 

950 algos.pad_inplace(values, mask, limit=limit) 

951 return values, mask 

952 

953 

954@_datetimelike_compat 

955def _backfill_1d( 

956 values: np.ndarray, 

957 limit: int | None = None, 

958 limit_area: Literal["inside", "outside"] | None = None, 

959 mask: npt.NDArray[np.bool_] | None = None, 

960) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: 

961 mask = _fillna_prep(values, mask) 

962 if limit_area is not None and not mask.all(): 

963 _fill_limit_area_1d(mask, limit_area) 

964 algos.backfill_inplace(values, mask, limit=limit) 

965 return values, mask 

966 

967 

968@_datetimelike_compat 

969def _pad_2d( 

970 values: np.ndarray, 

971 limit: int | None = None, 

972 limit_area: Literal["inside", "outside"] | None = None, 

973 mask: npt.NDArray[np.bool_] | None = None, 

974): 

975 mask = _fillna_prep(values, mask) 

976 if limit_area is not None: 

977 _fill_limit_area_2d(mask, limit_area) 

978 

979 if values.size: 

980 algos.pad_2d_inplace(values, mask, limit=limit) 

981 else: 

982 # for test coverage 

983 pass 

984 return values, mask 

985 

986 

987@_datetimelike_compat 

988def _backfill_2d( 

989 values, 

990 limit: int | None = None, 

991 limit_area: Literal["inside", "outside"] | None = None, 

992 mask: npt.NDArray[np.bool_] | None = None, 

993): 

994 mask = _fillna_prep(values, mask) 

995 if limit_area is not None: 

996 _fill_limit_area_2d(mask, limit_area) 

997 

998 if values.size: 

999 algos.backfill_2d_inplace(values, mask, limit=limit) 

1000 else: 

1001 # for test coverage 

1002 pass 

1003 return values, mask 

1004 

1005 

1006def _fill_limit_area_1d( 

1007 mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"] 

1008) -> None: 

1009 """Prepare 1d mask for ffill/bfill with limit_area. 

1010 

1011 Caller is responsible for checking at least one value of mask is False. 

1012 When called, mask will no longer faithfully represent when 

1013 the corresponding are NA or not. 

1014 

1015 Parameters 

1016 ---------- 

1017 mask : np.ndarray[bool, ndim=1] 

1018 Mask representing NA values when filling. 

1019 limit_area : { "outside", "inside" } 

1020 Whether to limit filling to outside or inside the outer most non-NA value. 

1021 """ 

1022 neg_mask = ~mask 

1023 first = neg_mask.argmax() 

1024 last = len(neg_mask) - neg_mask[::-1].argmax() - 1 

1025 if limit_area == "inside": 

1026 mask[:first] = False 

1027 mask[last + 1 :] = False 

1028 elif limit_area == "outside": 

1029 mask[first + 1 : last] = False 

1030 

1031 

1032def _fill_limit_area_2d( 

1033 mask: npt.NDArray[np.bool_], limit_area: Literal["outside", "inside"] 

1034) -> None: 

1035 """Prepare 2d mask for ffill/bfill with limit_area. 

1036 

1037 When called, mask will no longer faithfully represent when 

1038 the corresponding are NA or not. 

1039 

1040 Parameters 

1041 ---------- 

1042 mask : np.ndarray[bool, ndim=1] 

1043 Mask representing NA values when filling. 

1044 limit_area : { "outside", "inside" } 

1045 Whether to limit filling to outside or inside the outer most non-NA value. 

1046 """ 

1047 neg_mask = ~mask.T 

1048 if limit_area == "outside": 

1049 # Identify inside 

1050 la_mask = ( 

1051 np.maximum.accumulate(neg_mask, axis=0) 

1052 & np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1] 

1053 ) 

1054 else: 

1055 # Identify outside 

1056 la_mask = ( 

1057 ~np.maximum.accumulate(neg_mask, axis=0) 

1058 | ~np.maximum.accumulate(neg_mask[::-1], axis=0)[::-1] 

1059 ) 

1060 mask[la_mask.T] = False 

1061 

1062 

1063_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} 

1064 

1065 

1066def get_fill_func(method, ndim: int = 1): 

1067 method = clean_fill_method(method) 

1068 if ndim == 1: 

1069 return _fill_methods[method] 

1070 return {"pad": _pad_2d, "backfill": _backfill_2d}[method] 

1071 

1072 

1073def clean_reindex_fill_method(method) -> ReindexMethod | None: 

1074 if method is None: 

1075 return None 

1076 return clean_fill_method(method, allow_nearest=True) 

1077 

1078 

1079def _interp_limit( 

1080 invalid: npt.NDArray[np.bool_], fw_limit: int | None, bw_limit: int | None 

1081): 

1082 """ 

1083 Get indexers of values that won't be filled 

1084 because they exceed the limits. 

1085 

1086 Parameters 

1087 ---------- 

1088 invalid : np.ndarray[bool] 

1089 fw_limit : int or None 

1090 forward limit to index 

1091 bw_limit : int or None 

1092 backward limit to index 

1093 

1094 Returns 

1095 ------- 

1096 set of indexers 

1097 

1098 Notes 

1099 ----- 

1100 This is equivalent to the more readable, but slower 

1101 

1102 .. code-block:: python 

1103 

1104 def _interp_limit(invalid, fw_limit, bw_limit): 

1105 for x in np.where(invalid)[0]: 

1106 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): 

1107 yield x 

1108 """ 

1109 # handle forward first; the backward direction is the same except 

1110 # 1. operate on the reversed array 

1111 # 2. subtract the returned indices from N - 1 

1112 N = len(invalid) 

1113 f_idx = set() 

1114 b_idx = set() 

1115 

1116 def inner(invalid, limit: int): 

1117 limit = min(limit, N) 

1118 windowed = _rolling_window(invalid, limit + 1).all(1) 

1119 idx = set(np.where(windowed)[0] + limit) | set( 

1120 np.where((~invalid[: limit + 1]).cumsum() == 0)[0] 

1121 ) 

1122 return idx 

1123 

1124 if fw_limit is not None: 

1125 if fw_limit == 0: 

1126 f_idx = set(np.where(invalid)[0]) 

1127 else: 

1128 f_idx = inner(invalid, fw_limit) 

1129 

1130 if bw_limit is not None: 

1131 if bw_limit == 0: 

1132 # then we don't even need to care about backwards 

1133 # just use forwards 

1134 return f_idx 

1135 else: 

1136 b_idx_inv = list(inner(invalid[::-1], bw_limit)) 

1137 b_idx = set(N - 1 - np.asarray(b_idx_inv)) 

1138 if fw_limit == 0: 

1139 return b_idx 

1140 

1141 return f_idx & b_idx 

1142 

1143 

1144def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]: 

1145 """ 

1146 [True, True, False, True, False], 2 -> 

1147 

1148 [ 

1149 [True, True], 

1150 [True, False], 

1151 [False, True], 

1152 [True, False], 

1153 ] 

1154 """ 

1155 # https://stackoverflow.com/a/6811241 

1156 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) 

1157 strides = a.strides + (a.strides[-1],) 

1158 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)