Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/missing.py: 15%

1"""

2Routines for filling missing data.

3"""

4from __future__ import annotations

6from functools import (

7 partial,

8 wraps,

10from typing import (

11 TYPE_CHECKING,

12 Any,

13 cast,

14)

16import numpy as np

18from pandas._libs import (

19 NaT,

20 algos,

21 lib,

22)

23from pandas._typing import (

24 ArrayLike,

25 Axis,

26 AxisInt,

27 F,

28 npt,

29)

30from pandas.compat._optional import import_optional_dependency

32from pandas.core.dtypes.cast import infer_dtype_from

33from pandas.core.dtypes.common import (

34 is_array_like,

35 is_numeric_v_string_like,

36 is_object_dtype,

37 needs_i8_conversion,

38)

39from pandas.core.dtypes.missing import (

40 is_valid_na_for_dtype,

41 isna,

42 na_value_for_dtype,

43)

45if TYPE_CHECKING:

46 from pandas import Index

49def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):

50 """

51 Validate the size of the values passed to ExtensionArray.fillna.

52 """

53 if is_array_like(value):

54 if len(value) != length:

55 raise ValueError(

56 f"Length of 'value' does not match. Got ({len(value)}) "

57 f" expected {length}"

58 )

59 value = value[mask]

61 return value

64def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:

65 """

66 Return a masking array of same size/shape as arr

67 with entries equaling any member of values_to_mask set to True

69 Parameters

70 ----------

71 arr : ArrayLike

72 values_to_mask: list, tuple, or scalar

74 Returns

75 -------

76 np.ndarray[bool]

77 """

78 # When called from Block.replace/replace_list, values_to_mask is a scalar

79 # known to be holdable by arr.

80 # When called from Series._single_replace, values_to_mask is tuple or list

81 dtype, values_to_mask = infer_dtype_from(values_to_mask)

82 # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any],

83 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,

84 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],

85 # _DTypeDict, Tuple[Any, Any]]]"

86 values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type]

88 potential_na = False

89 if is_object_dtype(arr):

90 # pre-compute mask to avoid comparison to NA

91 potential_na = True

92 arr_mask = ~isna(arr)

94 na_mask = isna(values_to_mask)

95 nonna = values_to_mask[~na_mask]

97 # GH 21977

98 mask = np.zeros(arr.shape, dtype=bool)

99 for x in nonna:

100 if is_numeric_v_string_like(arr, x):

101 # GH#29553 prevent numpy deprecation warnings

102 pass

103 else:

104 if potential_na:

105 new_mask = np.zeros(arr.shape, dtype=np.bool_)

106 new_mask[arr_mask] = arr[arr_mask] == x

107 else:

108 new_mask = arr == x

109

110 if not isinstance(new_mask, np.ndarray):

111 # usually BooleanArray

112 new_mask = new_mask.to_numpy(dtype=bool, na_value=False)

113 mask |= new_mask

114

115 if na_mask.any():

116 mask |= isna(arr)

117

118 return mask

119

120

121def clean_fill_method(method: str | None, allow_nearest: bool = False):

122 # asfreq is compat for resampling

123 if method in [None, "asfreq"]:

124 return None

125

126 if isinstance(method, str):

127 method = method.lower()

128 if method == "ffill":

129 method = "pad"

130 elif method == "bfill":

131 method = "backfill"

132

133 valid_methods = ["pad", "backfill"]

134 expecting = "pad (ffill) or backfill (bfill)"

135 if allow_nearest:

136 valid_methods.append("nearest")

137 expecting = "pad (ffill), backfill (bfill) or nearest"

138 if method not in valid_methods:

139 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")

140 return method

141

142

143# interpolation methods that dispatch to np.interp

144

145NP_METHODS = ["linear", "time", "index", "values"]

146

147# interpolation methods that dispatch to _interpolate_scipy_wrapper

148

149SP_METHODS = [

150 "nearest",

151 "zero",

152 "slinear",

153 "quadratic",

154 "cubic",

155 "barycentric",

156 "krogh",

157 "spline",

158 "polynomial",

159 "from_derivatives",

160 "piecewise_polynomial",

161 "pchip",

162 "akima",

163 "cubicspline",

164]

165

166

167def clean_interp_method(method: str, index: Index, **kwargs) -> str:

168 order = kwargs.get("order")

169

170 if method in ("spline", "polynomial") and order is None:

171 raise ValueError("You must specify the order of the spline or polynomial.")

172

173 valid = NP_METHODS + SP_METHODS

174 if method not in valid:

175 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")

176

177 if method in ("krogh", "piecewise_polynomial", "pchip"):

178 if not index.is_monotonic_increasing:

179 raise ValueError(

180 f"{method} interpolation requires that the index be monotonic."

181 )

182

183 return method

184

185

186def find_valid_index(

187 values, *, how: str, is_valid: npt.NDArray[np.bool_]

188) -> int | None:

189 """

190 Retrieves the index of the first valid value.

191

192 Parameters

193 ----------

194 values : ndarray or ExtensionArray

195 how : {'first', 'last'}

196 Use this parameter to change between the first or last valid index.

197 is_valid: np.ndarray

198 Mask to find na_values.

199

200 Returns

201 -------

202 int or None

203 """

204 assert how in ["first", "last"]

205

206 if len(values) == 0: # early stop

207 return None

208

209 if values.ndim == 2:

210 is_valid = is_valid.any(axis=1) # reduce axis 1

211

212 if how == "first":

213 idxpos = is_valid[::].argmax()

214

215 elif how == "last":

216 idxpos = len(values) - 1 - is_valid[::-1].argmax()

217

218 chk_notna = is_valid[idxpos]

219

220 if not chk_notna:

221 return None

222 # Incompatible return value type (got "signedinteger[Any]",

223 # expected "Optional[int]")

224 return idxpos # type: ignore[return-value]

225

226

227def interpolate_array_2d(

228 data: np.ndarray,

229 method: str = "pad",

230 axis: AxisInt = 0,

231 index: Index | None = None,

232 limit: int | None = None,

233 limit_direction: str = "forward",

234 limit_area: str | None = None,

235 fill_value: Any | None = None,

236 coerce: bool = False,

237 downcast: str | None = None,

238 **kwargs,

239) -> None:

240 """

241 Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill.

242

243 Notes

244 -----

245 Alters 'data' in-place.

246 """

247 try:

248 m = clean_fill_method(method)

249 except ValueError:

250 m = None

251

252 if m is not None:

253 if fill_value is not None:

254 # similar to validate_fillna_kwargs

255 raise ValueError("Cannot pass both fill_value and method")

256

257 interpolate_2d(

258 data,

259 method=m,

260 axis=axis,

261 limit=limit,

262 limit_area=limit_area,

263 )

264 else:

265 assert index is not None # for mypy

266

267 _interpolate_2d_with_fill(

268 data=data,

269 index=index,

270 axis=axis,

271 method=method,

272 limit=limit,

273 limit_direction=limit_direction,

274 limit_area=limit_area,

275 fill_value=fill_value,

276 **kwargs,

277 )

278

279

280def _interpolate_2d_with_fill(

281 data: np.ndarray, # floating dtype

282 index: Index,

283 axis: AxisInt,

284 method: str = "linear",

285 limit: int | None = None,

286 limit_direction: str = "forward",

287 limit_area: str | None = None,

288 fill_value: Any | None = None,

289 **kwargs,

290) -> None:

291 """

292 Column-wise application of _interpolate_1d.

293

294 Notes

295 -----

296 Alters 'data' in-place.

297

298 The signature does differ from _interpolate_1d because it only

299 includes what is needed for Block.interpolate.

300 """

301 # validate the interp method

302 clean_interp_method(method, index, **kwargs)

303

304 if is_valid_na_for_dtype(fill_value, data.dtype):

305 fill_value = na_value_for_dtype(data.dtype, compat=False)

306

307 if method == "time":

308 if not needs_i8_conversion(index.dtype):

309 raise ValueError(

310 "time-weighted interpolation only works "

311 "on Series or DataFrames with a "

312 "DatetimeIndex"

313 )

314 method = "values"

315

316 valid_limit_directions = ["forward", "backward", "both"]

317 limit_direction = limit_direction.lower()

318 if limit_direction not in valid_limit_directions:

319 raise ValueError(

320 "Invalid limit_direction: expecting one of "

321 f"{valid_limit_directions}, got '{limit_direction}'."

322 )

323

324 if limit_area is not None:

325 valid_limit_areas = ["inside", "outside"]

326 limit_area = limit_area.lower()

327 if limit_area not in valid_limit_areas:

328 raise ValueError(

329 f"Invalid limit_area: expecting one of {valid_limit_areas}, got "

330 f"{limit_area}."

331 )

332

333 # default limit is unlimited GH #16282

334 limit = algos.validate_limit(nobs=None, limit=limit)

335

336 indices = _index_to_interp_indices(index, method)

337

338 def func(yvalues: np.ndarray) -> None:

339 # process 1-d slices in the axis direction

340

341 _interpolate_1d(

342 indices=indices,

343 yvalues=yvalues,

344 method=method,

345 limit=limit,

346 limit_direction=limit_direction,

347 limit_area=limit_area,

348 fill_value=fill_value,

349 bounds_error=False,

350 **kwargs,

351 )

352

353 # error: Argument 1 to "apply_along_axis" has incompatible type

354 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[...,

355 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray

356 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],

357 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],

358 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]"

359 np.apply_along_axis(func, axis, data) # type: ignore[arg-type]

360

361

362def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:

363 """

364 Convert Index to ndarray of indices to pass to NumPy/SciPy.

365 """

366 xarr = index._values

367 if needs_i8_conversion(xarr.dtype):

368 # GH#1646 for dt64tz

369 xarr = xarr.view("i8")

370

371 if method == "linear":

372 inds = xarr

373 inds = cast(np.ndarray, inds)

374 else:

375 inds = np.asarray(xarr)

376

377 if method in ("values", "index"):

378 if inds.dtype == np.object_:

379 inds = lib.maybe_convert_objects(inds)

380

381 return inds

382

383

384def _interpolate_1d(

385 indices: np.ndarray,

386 yvalues: np.ndarray,

387 method: str | None = "linear",

388 limit: int | None = None,

389 limit_direction: str = "forward",

390 limit_area: str | None = None,

391 fill_value: Any | None = None,

392 bounds_error: bool = False,

393 order: int | None = None,

394 **kwargs,

395) -> None:

396 """

397 Logic for the 1-d interpolation. The input

398 indices and yvalues will each be 1-d arrays of the same length.

399

400 Bounds_error is currently hardcoded to False since non-scipy ones don't

401 take it as an argument.

402

403 Notes

404 -----

405 Fills 'yvalues' in-place.

406 """

407

408 invalid = isna(yvalues)

409 valid = ~invalid

410

411 if not valid.any():

412 return

413

414 if valid.all():

415 return

416

417 # These are sets of index pointers to invalid values... i.e. {0, 1, etc...

418 all_nans = set(np.flatnonzero(invalid))

419

420 first_valid_index = find_valid_index(yvalues, how="first", is_valid=valid)

421 if first_valid_index is None: # no nan found in start

422 first_valid_index = 0

423 start_nans = set(range(first_valid_index))

424

425 last_valid_index = find_valid_index(yvalues, how="last", is_valid=valid)

426 if last_valid_index is None: # no nan found in end

427 last_valid_index = len(yvalues)

428 end_nans = set(range(1 + last_valid_index, len(valid)))

429

430 # Like the sets above, preserve_nans contains indices of invalid values,

431 # but in this case, it is the final set of indices that need to be

432 # preserved as NaN after the interpolation.

433

434 # For example if limit_direction='forward' then preserve_nans will

435 # contain indices of NaNs at the beginning of the series, and NaNs that

436 # are more than 'limit' away from the prior non-NaN.

437

438 # set preserve_nans based on direction using _interp_limit

439 preserve_nans: list | set

440 if limit_direction == "forward":

441 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))

442 elif limit_direction == "backward":

443 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))

444 else:

445 # both directions... just use _interp_limit

446 preserve_nans = set(_interp_limit(invalid, limit, limit))

447

448 # if limit_area is set, add either mid or outside indices

449 # to preserve_nans GH #16284

450 if limit_area == "inside":

451 # preserve NaNs on the outside

452 preserve_nans |= start_nans | end_nans

453 elif limit_area == "outside":

454 # preserve NaNs on the inside

455 mid_nans = all_nans - start_nans - end_nans

456 preserve_nans |= mid_nans

457

458 # sort preserve_nans and convert to list

459 preserve_nans = sorted(preserve_nans)

460

461 is_datetimelike = needs_i8_conversion(yvalues.dtype)

462

463 if is_datetimelike:

464 yvalues = yvalues.view("i8")

465

466 if method in NP_METHODS:

467 # np.interp requires sorted X values, #21037

468

469 indexer = np.argsort(indices[valid])

470 yvalues[invalid] = np.interp(

471 indices[invalid], indices[valid][indexer], yvalues[valid][indexer]

472 )

473 else:

474 yvalues[invalid] = _interpolate_scipy_wrapper(

475 indices[valid],

476 yvalues[valid],

477 indices[invalid],

478 method=method,

479 fill_value=fill_value,

480 bounds_error=bounds_error,

481 order=order,

482 **kwargs,

483 )

484

485 if is_datetimelike:

486 yvalues[preserve_nans] = NaT.value

487 else:

488 yvalues[preserve_nans] = np.nan

489 return

490

491

492def _interpolate_scipy_wrapper(

493 x,

494 y,

495 new_x,

496 method,

497 fill_value=None,

498 bounds_error: bool = False,

499 order=None,

500 **kwargs,

501):

502 """

503 Passed off to scipy.interpolate.interp1d. method is scipy's kind.

504 Returns an array interpolated at new_x. Add any new methods to

505 the list in _clean_interp_method.

506 """

507 extra = f"{method} interpolation requires SciPy."

508 import_optional_dependency("scipy", extra=extra)

509 from scipy import interpolate

510

511 new_x = np.asarray(new_x)

512

513 # ignores some kwargs that could be passed along.

514 alt_methods = {

515 "barycentric": interpolate.barycentric_interpolate,

516 "krogh": interpolate.krogh_interpolate,

517 "from_derivatives": _from_derivatives,

518 "piecewise_polynomial": _from_derivatives,

519 }

520

521 if getattr(x, "_is_all_dates", False):

522 # GH 5975, scipy.interp1d can't handle datetime64s

523 x, new_x = x._values.astype("i8"), new_x.astype("i8")

524

525 if method == "pchip":

526 alt_methods["pchip"] = interpolate.pchip_interpolate

527 elif method == "akima":

528 alt_methods["akima"] = _akima_interpolate

529 elif method == "cubicspline":

530 alt_methods["cubicspline"] = _cubicspline_interpolate

531

532 interp1d_methods = [

533 "nearest",

534 "zero",

535 "slinear",

536 "quadratic",

537 "cubic",

538 "polynomial",

539 ]

540 if method in interp1d_methods:

541 if method == "polynomial":

542 method = order

543 terp = interpolate.interp1d(

544 x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error

545 )

546 new_y = terp(new_x)

547 elif method == "spline":

548 # GH #10633, #24014

549 if isna(order) or (order <= 0):

550 raise ValueError(

551 f"order needs to be specified and greater than 0; got order: {order}"

552 )

553 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)

554 new_y = terp(new_x)

555 else:

556 # GH 7295: need to be able to write for some reason

557 # in some circumstances: check all three

558 if not x.flags.writeable:

559 x = x.copy()

560 if not y.flags.writeable:

561 y = y.copy()

562 if not new_x.flags.writeable:

563 new_x = new_x.copy()

564 method = alt_methods[method]

565 new_y = method(x, y, new_x, **kwargs)

566 return new_y

567

568

569def _from_derivatives(

570 xi, yi, x, order=None, der: int | list[int] | None = 0, extrapolate: bool = False

571):

572 """

573 Convenience function for interpolate.BPoly.from_derivatives.

574

575 Construct a piecewise polynomial in the Bernstein basis, compatible

576 with the specified values and derivatives at breakpoints.

577

578 Parameters

579 ----------

580 xi : array-like

581 sorted 1D array of x-coordinates

582 yi : array-like or list of array-likes

583 yi[i][j] is the j-th derivative known at xi[i]

584 order: None or int or array-like of ints. Default: None.

585 Specifies the degree of local polynomials. If not None, some

586 derivatives are ignored.

587 der : int or list

588 How many derivatives to extract; None for all potentially nonzero

589 derivatives (that is a number equal to the number of points), or a

590 list of derivatives to extract. This number includes the function

591 value as 0th derivative.

592 extrapolate : bool, optional

593 Whether to extrapolate to ouf-of-bounds points based on first and last

594 intervals, or to return NaNs. Default: True.

595

596 See Also

597 --------

598 scipy.interpolate.BPoly.from_derivatives

599

600 Returns

601 -------

602 y : scalar or array-like

603 The result, of length R or length M or M by R.

604 """

605 from scipy import interpolate

606

607 # return the method for compat with scipy version & backwards compat

608 method = interpolate.BPoly.from_derivatives

609 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate)

610

611 return m(x)

612

613

614def _akima_interpolate(xi, yi, x, der: int | list[int] | None = 0, axis: AxisInt = 0):

615 """

616 Convenience function for akima interpolation.

617 xi and yi are arrays of values used to approximate some function f,

618 with ``yi = f(xi)``.

619

620 See `Akima1DInterpolator` for details.

621

622 Parameters

623 ----------

624 xi : array-like

625 A sorted list of x-coordinates, of length N.

626 yi : array-like

627 A 1-D array of real values. `yi`'s length along the interpolation

628 axis must be equal to the length of `xi`. If N-D array, use axis

629 parameter to select correct axis.

630 x : scalar or array-like

631 Of length M.

632 der : int, optional

633 How many derivatives to extract; None for all potentially

634 nonzero derivatives (that is a number equal to the number

635 of points), or a list of derivatives to extract. This number

636 includes the function value as 0th derivative.

637 axis : int, optional

638 Axis in the yi array corresponding to the x-coordinate values.

639

640 See Also

641 --------

642 scipy.interpolate.Akima1DInterpolator

643

644 Returns

645 -------

646 y : scalar or array-like

647 The result, of length R or length M or M by R,

648

649 """

650 from scipy import interpolate

651

652 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis)

653

654 return P(x, nu=der)

655

656

657def _cubicspline_interpolate(

658 xi,

659 yi,

660 x,

661 axis: AxisInt = 0,

662 bc_type: str | tuple[Any, Any] = "not-a-knot",

663 extrapolate=None,

664):

665 """

666 Convenience function for cubic spline data interpolator.

667

668 See `scipy.interpolate.CubicSpline` for details.

669

670 Parameters

671 ----------

672 xi : array-like, shape (n,)

673 1-d array containing values of the independent variable.

674 Values must be real, finite and in strictly increasing order.

675 yi : array-like

676 Array containing values of the dependent variable. It can have

677 arbitrary number of dimensions, but the length along ``axis``

678 (see below) must match the length of ``x``. Values must be finite.

679 x : scalar or array-like, shape (m,)

680 axis : int, optional

681 Axis along which `y` is assumed to be varying. Meaning that for

682 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.

683 Default is 0.

684 bc_type : string or 2-tuple, optional

685 Boundary condition type. Two additional equations, given by the

686 boundary conditions, are required to determine all coefficients of

687 polynomials on each segment [2]_.

688 If `bc_type` is a string, then the specified condition will be applied

689 at both ends of a spline. Available conditions are:

690 * 'not-a-knot' (default): The first and second segment at a curve end

691 are the same polynomial. It is a good default when there is no

692 information on boundary conditions.

693 * 'periodic': The interpolated functions is assumed to be periodic

694 of period ``x[-1] - x[0]``. The first and last value of `y` must be

695 identical: ``y[0] == y[-1]``. This boundary condition will result in

696 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.

697 * 'clamped': The first derivative at curves ends are zero. Assuming

698 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.

699 * 'natural': The second derivative at curve ends are zero. Assuming

700 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.

701 If `bc_type` is a 2-tuple, the first and the second value will be

702 applied at the curve start and end respectively. The tuple values can

703 be one of the previously mentioned strings (except 'periodic') or a

704 tuple `(order, deriv_values)` allowing to specify arbitrary

705 derivatives at curve ends:

706 * `order`: the derivative order, 1 or 2.

707 * `deriv_value`: array-like containing derivative values, shape must

708 be the same as `y`, excluding ``axis`` dimension. For example, if

709 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with

710 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D

711 and have the shape (n0, n1).

712 extrapolate : {bool, 'periodic', None}, optional

713 If bool, determines whether to extrapolate to out-of-bounds points

714 based on first and last intervals, or to return NaNs. If 'periodic',

715 periodic extrapolation is used. If None (default), ``extrapolate`` is

716 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.

717

718 See Also

719 --------

720 scipy.interpolate.CubicHermiteSpline

721

722 Returns

723 -------

724 y : scalar or array-like

725 The result, of shape (m,)

726

727 References

728 ----------

729 .. [1] `Cubic Spline Interpolation

730 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_

731 on Wikiversity.

732 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.

733 """

734 from scipy import interpolate

735

736 P = interpolate.CubicSpline(

737 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate

738 )

739

740 return P(x)

741

742

743def _interpolate_with_limit_area(

744 values: np.ndarray, method: str, limit: int | None, limit_area: str | None

745) -> None:

746 """

747 Apply interpolation and limit_area logic to values along a to-be-specified axis.

748

749 Parameters

750 ----------

751 values: np.ndarray

752 Input array.

753 method: str

754 Interpolation method. Could be "bfill" or "pad"

755 limit: int, optional

756 Index limit on interpolation.

757 limit_area: str

758 Limit area for interpolation. Can be "inside" or "outside"

759

760 Notes

761 -----

762 Modifies values in-place.

763 """

764

765 invalid = isna(values)

766 is_valid = ~invalid

767

768 if not invalid.all():

769 first = find_valid_index(values, how="first", is_valid=is_valid)

770 if first is None:

771 first = 0

772 last = find_valid_index(values, how="last", is_valid=is_valid)

773 if last is None:

774 last = len(values)

775

776 interpolate_2d(

777 values,

778 method=method,

779 limit=limit,

780 )

781

782 if limit_area == "inside":

783 invalid[first : last + 1] = False

784 elif limit_area == "outside":

785 invalid[:first] = invalid[last + 1 :] = False

786

787 values[invalid] = np.nan

788

789

790def interpolate_2d(

791 values: np.ndarray,

792 method: str = "pad",

793 axis: Axis = 0,

794 limit: int | None = None,

795 limit_area: str | None = None,

796) -> None:

797 """

798 Perform an actual interpolation of values, values will be make 2-d if

799 needed fills inplace, returns the result.

800

801 Parameters

802 ----------

803 values: np.ndarray

804 Input array.

805 method: str, default "pad"

806 Interpolation method. Could be "bfill" or "pad"

807 axis: 0 or 1

808 Interpolation axis

809 limit: int, optional

810 Index limit on interpolation.

811 limit_area: str, optional

812 Limit area for interpolation. Can be "inside" or "outside"

813

814 Notes

815 -----

816 Modifies values in-place.

817 """

818 if limit_area is not None:

819 np.apply_along_axis(

820 # error: Argument 1 to "apply_along_axis" has incompatible type

821 # "partial[None]"; expected

822 # "Callable[..., Union[_SupportsArray[dtype[<nothing>]],

823 # Sequence[_SupportsArray[dtype[<nothing>]]],

824 # Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],

825 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],

826 # Sequence[Sequence[Sequence[Sequence[_

827 # SupportsArray[dtype[<nothing>]]]]]]]]"

828 partial( # type: ignore[arg-type]

829 _interpolate_with_limit_area,

830 method=method,

831 limit=limit,

832 limit_area=limit_area,

833 ),

834 # error: Argument 2 to "apply_along_axis" has incompatible type

835 # "Union[str, int]"; expected "SupportsIndex"

836 axis, # type: ignore[arg-type]

837 values,

838 )

839 return

840

841 transf = (lambda x: x) if axis == 0 else (lambda x: x.T)

842

843 # reshape a 1 dim if needed

844 if values.ndim == 1:

845 if axis != 0: # pragma: no cover

846 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")

847 values = values.reshape(tuple((1,) + values.shape))

848

849 method = clean_fill_method(method)

850 tvalues = transf(values)

851

852 # _pad_2d and _backfill_2d both modify tvalues inplace

853 if method == "pad":

854 _pad_2d(tvalues, limit=limit)

855 else:

856 _backfill_2d(tvalues, limit=limit)

857

858 return

859

860

861def _fillna_prep(

862 values, mask: npt.NDArray[np.bool_] | None = None

863) -> npt.NDArray[np.bool_]:

864 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d

865

866 if mask is None:

867 mask = isna(values)

868

869 mask = mask.view(np.uint8)

870 return mask

871

872

873def _datetimelike_compat(func: F) -> F:

874 """

875 Wrapper to handle datetime64 and timedelta64 dtypes.

876 """

877

878 @wraps(func)

879 def new_func(values, limit=None, mask=None):

880 if needs_i8_conversion(values.dtype):

881 if mask is None:

882 # This needs to occur before casting to int64

883 mask = isna(values)

884

885 result, mask = func(values.view("i8"), limit=limit, mask=mask)

886 return result.view(values.dtype), mask

887

888 return func(values, limit=limit, mask=mask)

889

890 return cast(F, new_func)

891

892

893@_datetimelike_compat

894def _pad_1d(

895 values: np.ndarray,

896 limit: int | None = None,

897 mask: npt.NDArray[np.bool_] | None = None,

898) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:

899 mask = _fillna_prep(values, mask)

900 algos.pad_inplace(values, mask, limit=limit)

901 return values, mask

902

903

904@_datetimelike_compat

905def _backfill_1d(

906 values: np.ndarray,

907 limit: int | None = None,

908 mask: npt.NDArray[np.bool_] | None = None,

909) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:

910 mask = _fillna_prep(values, mask)

911 algos.backfill_inplace(values, mask, limit=limit)

912 return values, mask

913

914

915@_datetimelike_compat

916def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None):

917 mask = _fillna_prep(values, mask)

918

919 if np.all(values.shape):

920 algos.pad_2d_inplace(values, mask, limit=limit)

921 else:

922 # for test coverage

923 pass

924 return values, mask

925

926

927@_datetimelike_compat

928def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None):

929 mask = _fillna_prep(values, mask)

930

931 if np.all(values.shape):

932 algos.backfill_2d_inplace(values, mask, limit=limit)

933 else:

934 # for test coverage

935 pass

936 return values, mask

937

938

939_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}

940

941

942def get_fill_func(method, ndim: int = 1):

943 method = clean_fill_method(method)

944 if ndim == 1:

945 return _fill_methods[method]

946 return {"pad": _pad_2d, "backfill": _backfill_2d}[method]

947

948

949def clean_reindex_fill_method(method) -> str | None:

950 return clean_fill_method(method, allow_nearest=True)

951

952

953def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit):

954 """

955 Get indexers of values that won't be filled

956 because they exceed the limits.

957

958 Parameters

959 ----------

960 invalid : np.ndarray[bool]

961 fw_limit : int or None

962 forward limit to index

963 bw_limit : int or None

964 backward limit to index

965

966 Returns

967 -------

968 set of indexers

969

970 Notes

971 -----

972 This is equivalent to the more readable, but slower

973

974 .. code-block:: python

975

976 def _interp_limit(invalid, fw_limit, bw_limit):

977 for x in np.where(invalid)[0]:

978 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():

979 yield x

980 """

981 # handle forward first; the backward direction is the same except

982 # 1. operate on the reversed array

983 # 2. subtract the returned indices from N - 1

984 N = len(invalid)

985 f_idx = set()

986 b_idx = set()

987

988 def inner(invalid, limit):

989 limit = min(limit, N)

990 windowed = _rolling_window(invalid, limit + 1).all(1)

991 idx = set(np.where(windowed)[0] + limit) | set(

992 np.where((~invalid[: limit + 1]).cumsum() == 0)[0]

993 )

994 return idx

995

996 if fw_limit is not None:

997 if fw_limit == 0:

998 f_idx = set(np.where(invalid)[0])

999 else:

1000 f_idx = inner(invalid, fw_limit)

1001

1002 if bw_limit is not None:

1003 if bw_limit == 0:

1004 # then we don't even need to care about backwards

1005 # just use forwards

1006 return f_idx

1007 else:

1008 b_idx_inv = list(inner(invalid[::-1], bw_limit))

1009 b_idx = set(N - 1 - np.asarray(b_idx_inv))

1010 if fw_limit == 0:

1011 return b_idx

1012

1013 return f_idx & b_idx

1014

1015

1016def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]:

1017 """

1018 [True, True, False, True, False], 2 ->

1019

1020 [

1021 [True, True],

1022 [True, False],

1023 [False, True],

1024 [True, False],

1025 ]

1026 """

1027 # https://stackoverflow.com/a/6811241

1028 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)

1029 strides = a.strides + (a.strides[-1],)

1030 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)