Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/internals/blocks.py: 37%

1from __future__ import annotations

3from functools import wraps

4import inspect

5import re

6from typing import (

7 TYPE_CHECKING,

8 Any,

9 Callable,

10 Literal,

11 cast,

12 final,

13)

14import warnings

15import weakref

17import numpy as np

19from pandas._config import (

20 get_option,

21 using_copy_on_write,

22 warn_copy_on_write,

23)

25from pandas._libs import (

26 NaT,

27 internals as libinternals,

28 lib,

29)

30from pandas._libs.internals import (

31 BlockPlacement,

32 BlockValuesRefs,

33)

34from pandas._libs.missing import NA

35from pandas._typing import (

36 ArrayLike,

37 AxisInt,

38 DtypeBackend,

39 DtypeObj,

40 F,

41 FillnaOptions,

42 IgnoreRaise,

43 InterpolateOptions,

44 QuantileInterpolation,

45 Self,

46 Shape,

47 npt,

48)

49from pandas.errors import AbstractMethodError

50from pandas.util._decorators import cache_readonly

51from pandas.util._exceptions import find_stack_level

52from pandas.util._validators import validate_bool_kwarg

54from pandas.core.dtypes.astype import (

55 astype_array_safe,

56 astype_is_view,

57)

58from pandas.core.dtypes.cast import (

59 LossySetitemError,

60 can_hold_element,

61 convert_dtypes,

62 find_result_type,

63 maybe_downcast_to_dtype,

64 np_can_hold_element,

65)

66from pandas.core.dtypes.common import (

67 is_1d_only_ea_dtype,

68 is_float_dtype,

69 is_integer_dtype,

70 is_list_like,

71 is_scalar,

72 is_string_dtype,

73)

74from pandas.core.dtypes.dtypes import (

75 DatetimeTZDtype,

76 ExtensionDtype,

77 IntervalDtype,

78 NumpyEADtype,

79 PeriodDtype,

80)

81from pandas.core.dtypes.generic import (

82 ABCDataFrame,

83 ABCIndex,

84 ABCNumpyExtensionArray,

85 ABCSeries,

86)

87from pandas.core.dtypes.missing import (

88 is_valid_na_for_dtype,

89 isna,

90 na_value_for_dtype,

91)

93from pandas.core import missing

94import pandas.core.algorithms as algos

95from pandas.core.array_algos.putmask import (

96 extract_bool_array,

97 putmask_inplace,

98 putmask_without_repeat,

99 setitem_datetimelike_compat,

100 validate_putmask,

101)

102from pandas.core.array_algos.quantile import quantile_compat

103from pandas.core.array_algos.replace import (

104 compare_or_regex_search,

105 replace_regex,

106 should_use_regex,

107)

108from pandas.core.array_algos.transforms import shift

109from pandas.core.arrays import (

110 Categorical,

111 DatetimeArray,

112 ExtensionArray,

113 IntervalArray,

114 NumpyExtensionArray,

115 PeriodArray,

116 TimedeltaArray,

117)

118from pandas.core.base import PandasObject

119import pandas.core.common as com

120from pandas.core.computation import expressions

121from pandas.core.construction import (

122 ensure_wrapped_if_datetimelike,

123 extract_array,

124)

125from pandas.core.indexers import check_setitem_lengths

126from pandas.core.indexes.base import get_values_for_csv

127

128if TYPE_CHECKING:

129 from collections.abc import (

130 Iterable,

131 Sequence,

132 )

133

134 from pandas.core.api import Index

135 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray

136

137# comparison is faster than is_object_dtype

138_dtype_obj = np.dtype("object")

139

140

141COW_WARNING_GENERAL_MSG = """\

142Setting a value on a view: behaviour will change in pandas 3.0.

143You are mutating a Series or DataFrame object, and currently this mutation will

144also have effect on other Series or DataFrame objects that share data with this

145object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object

146will never modify another.

147"""

148

149

150COW_WARNING_SETITEM_MSG = """\

151Setting a value on a view: behaviour will change in pandas 3.0.

152Currently, the mutation will also have effect on the object that shares data

153with this object. For example, when setting a value in a Series that was

154extracted from a column of a DataFrame, that DataFrame will also be updated:

155

156 ser = df["col"]

157 ser[0] = 0 <--- in pandas 2, this also updates `df`

158

159In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never

160modify another, and thus in the example above, `df` will not be changed.

161"""

162

163

164def maybe_split(meth: F) -> F:

165 """

166 If we have a multi-column block, split and operate block-wise. Otherwise

167 use the original method.

168 """

169

170 @wraps(meth)

171 def newfunc(self, *args, **kwargs) -> list[Block]:

172 if self.ndim == 1 or self.shape[0] == 1:

173 return meth(self, *args, **kwargs)

174 else:

175 # Split and operate column-by-column

176 return self.split_and_operate(meth, *args, **kwargs)

177

178 return cast(F, newfunc)

179

180

181class Block(PandasObject, libinternals.Block):

182 """

183 Canonical n-dimensional unit of homogeneous dtype contained in a pandas

184 data structure

185

186 Index-ignorant; let the container take care of that

187 """

188

189 values: np.ndarray | ExtensionArray

190 ndim: int

191 refs: BlockValuesRefs

192 __init__: Callable

193

194 __slots__ = ()

195 is_numeric = False

196

197 @final

198 @cache_readonly

199 def _validate_ndim(self) -> bool:

200 """

201 We validate dimension for blocks that can hold 2D values, which for now

202 means numpy dtypes or DatetimeTZDtype.

203 """

204 dtype = self.dtype

205 return not isinstance(dtype, ExtensionDtype) or isinstance(

206 dtype, DatetimeTZDtype

207 )

208

209 @final

210 @cache_readonly

211 def is_object(self) -> bool:

212 return self.values.dtype == _dtype_obj

213

214 @final

215 @cache_readonly

216 def is_extension(self) -> bool:

217 return not lib.is_np_dtype(self.values.dtype)

218

219 @final

220 @cache_readonly

221 def _can_consolidate(self) -> bool:

222 # We _could_ consolidate for DatetimeTZDtype but don't for now.

223 return not self.is_extension

224

225 @final

226 @cache_readonly

227 def _consolidate_key(self):

228 return self._can_consolidate, self.dtype.name

229

230 @final

231 @cache_readonly

232 def _can_hold_na(self) -> bool:

233 """

234 Can we store NA values in this Block?

235 """

236 dtype = self.dtype

237 if isinstance(dtype, np.dtype):

238 return dtype.kind not in "iub"

239 return dtype._can_hold_na

240

241 @final

242 @property

243 def is_bool(self) -> bool:

244 """

245 We can be bool if a) we are bool dtype or b) object dtype with bool objects.

246 """

247 return self.values.dtype == np.dtype(bool)

248

249 @final

250 def external_values(self):

251 return external_values(self.values)

252

253 @final

254 @cache_readonly

255 def fill_value(self):

256 # Used in reindex_indexer

257 return na_value_for_dtype(self.dtype, compat=False)

258

259 @final

260 def _standardize_fill_value(self, value):

261 # if we are passed a scalar None, convert it here

262 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype):

263 value = self.fill_value

264 return value

265

266 @property

267 def mgr_locs(self) -> BlockPlacement:

268 return self._mgr_locs

269

270 @mgr_locs.setter

271 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None:

272 self._mgr_locs = new_mgr_locs

273

274 @final

275 def make_block(

276 self,

277 values,

278 placement: BlockPlacement | None = None,

279 refs: BlockValuesRefs | None = None,

280 ) -> Block:

281 """

282 Create a new block, with type inference propagate any values that are

283 not specified

284 """

285 if placement is None:

286 placement = self._mgr_locs

287 if self.is_extension:

288 values = ensure_block_shape(values, ndim=self.ndim)

289

290 return new_block(values, placement=placement, ndim=self.ndim, refs=refs)

291

292 @final

293 def make_block_same_class(

294 self,

295 values,

296 placement: BlockPlacement | None = None,

297 refs: BlockValuesRefs | None = None,

298 ) -> Self:

299 """Wrap given values in a block of same type as self."""

300 # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet

301 # relied on it, as of 2.0 the caller is responsible for this.

302 if placement is None:

303 placement = self._mgr_locs

304

305 # We assume maybe_coerce_values has already been called

306 return type(self)(values, placement=placement, ndim=self.ndim, refs=refs)

307

308 @final

309 def __repr__(self) -> str:

310 # don't want to print out all of the items here

311 name = type(self).__name__

312 if self.ndim == 1:

313 result = f"{name}: {len(self)} dtype: {self.dtype}"

314 else:

315 shape = " x ".join([str(s) for s in self.shape])

316 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"

317

318 return result

319

320 @final

321 def __len__(self) -> int:

322 return len(self.values)

323

324 @final

325 def slice_block_columns(self, slc: slice) -> Self:

326 """

327 Perform __getitem__-like, return result as block.

328 """

329 new_mgr_locs = self._mgr_locs[slc]

330

331 new_values = self._slice(slc)

332 refs = self.refs

333 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)

334

335 @final

336 def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:

337 """

338 Perform __getitem__-like, return result as block.

339

340 Only supports slices that preserve dimensionality.

341 """

342 # Note: only called from is from internals.concat, and we can verify

343 # that never happens with 1-column blocks, i.e. never for ExtensionBlock.

344

345 new_mgr_locs = self._mgr_locs[indices]

346

347 new_values = self._slice(indices)

348 return type(self)(new_values, new_mgr_locs, self.ndim, refs=None)

349

350 @final

351 def getitem_block_columns(

352 self, slicer: slice, new_mgr_locs: BlockPlacement, ref_inplace_op: bool = False

353 ) -> Self:

354 """

355 Perform __getitem__-like, return result as block.

356

357 Only supports slices that preserve dimensionality.

358 """

359 new_values = self._slice(slicer)

360 refs = self.refs if not ref_inplace_op or self.refs.has_reference() else None

361 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)

362

363 @final

364 def _can_hold_element(self, element: Any) -> bool:

365 """require the same dtype as ourselves"""

366 element = extract_array(element, extract_numpy=True)

367 return can_hold_element(self.values, element)

368

369 @final

370 def should_store(self, value: ArrayLike) -> bool:

371 """

372 Should we set self.values[indexer] = value inplace or do we need to cast?

373

374 Parameters

375 ----------

376 value : np.ndarray or ExtensionArray

377

378 Returns

379 -------

380 bool

381 """

382 return value.dtype == self.dtype

383

384 # ---------------------------------------------------------------------

385 # Apply/Reduce and Helpers

386

387 @final

388 def apply(self, func, **kwargs) -> list[Block]:

389 """

390 apply the function to my values; return a block if we are not

391 one

392 """

393 result = func(self.values, **kwargs)

394

395 result = maybe_coerce_values(result)

396 return self._split_op_result(result)

397

398 @final

399 def reduce(self, func) -> list[Block]:

400 # We will apply the function and reshape the result into a single-row

401 # Block with the same mgr_locs; squeezing will be done at a higher level

402 assert self.ndim == 2

403

404 result = func(self.values)

405

406 if self.values.ndim == 1:

407 res_values = result

408 else:

409 res_values = result.reshape(-1, 1)

410

411 nb = self.make_block(res_values)

412 return [nb]

413

414 @final

415 def _split_op_result(self, result: ArrayLike) -> list[Block]:

416 # See also: split_and_operate

417 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype):

418 # TODO(EA2D): unnecessary with 2D EAs

419 # if we get a 2D ExtensionArray, we need to split it into 1D pieces

420 nbs = []

421 for i, loc in enumerate(self._mgr_locs):

422 if not is_1d_only_ea_dtype(result.dtype):

423 vals = result[i : i + 1]

424 else:

425 vals = result[i]

426

427 bp = BlockPlacement(loc)

428 block = self.make_block(values=vals, placement=bp)

429 nbs.append(block)

430 return nbs

431

432 nb = self.make_block(result)

433

434 return [nb]

435

436 @final

437 def _split(self) -> list[Block]:

438 """

439 Split a block into a list of single-column blocks.

440 """

441 assert self.ndim == 2

442

443 new_blocks = []

444 for i, ref_loc in enumerate(self._mgr_locs):

445 vals = self.values[slice(i, i + 1)]

446

447 bp = BlockPlacement(ref_loc)

448 nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs)

449 new_blocks.append(nb)

450 return new_blocks

451

452 @final

453 def split_and_operate(self, func, *args, **kwargs) -> list[Block]:

454 """

455 Split the block and apply func column-by-column.

456

457 Parameters

458 ----------

459 func : Block method

460 *args

461 **kwargs

462

463 Returns

464 -------

465 List[Block]

466 """

467 assert self.ndim == 2 and self.shape[0] != 1

468

469 res_blocks = []

470 for nb in self._split():

471 rbs = func(nb, *args, **kwargs)

472 res_blocks.extend(rbs)

473 return res_blocks

474

475 # ---------------------------------------------------------------------

476 # Up/Down-casting

477

478 @final

479 def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:

480 """

481 coerce the current block to a dtype compat for other

482 we will return a block, possibly object, and not raise

483

484 we can also safely try to coerce to the same dtype

485 and will receive the same block

486 """

487 new_dtype = find_result_type(self.values.dtype, other)

488 if new_dtype == self.dtype:

489 # GH#52927 avoid RecursionError

490 raise AssertionError(

491 "Something has gone wrong, please report a bug at "

492 "https://github.com/pandas-dev/pandas/issues"

493 )

494

495 # In a future version of pandas, the default will be that

496 # setting `nan` into an integer series won't raise.

497 if (

498 is_scalar(other)

499 and is_integer_dtype(self.values.dtype)

500 and isna(other)

501 and other is not NaT

502 and not (

503 isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)

504 )

505 ):

506 warn_on_upcast = False

507 elif (

508 isinstance(other, np.ndarray)

509 and other.ndim == 1

510 and is_integer_dtype(self.values.dtype)

511 and is_float_dtype(other.dtype)

512 and lib.has_only_ints_or_nan(other)

513 ):

514 warn_on_upcast = False

515

516 if warn_on_upcast:

517 warnings.warn(

518 f"Setting an item of incompatible dtype is deprecated "

519 "and will raise an error in a future version of pandas. "

520 f"Value '{other}' has dtype incompatible with {self.values.dtype}, "

521 "please explicitly cast to a compatible dtype first.",

522 FutureWarning,

523 stacklevel=find_stack_level(),

524 )

525 if self.values.dtype == new_dtype:

526 raise AssertionError(

527 f"Did not expect new dtype {new_dtype} to equal self.dtype "

528 f"{self.values.dtype}. Please report a bug at "

529 "https://github.com/pandas-dev/pandas/issues."

530 )

531 return self.astype(new_dtype, copy=False)

532

533 @final

534 def _maybe_downcast(

535 self,

536 blocks: list[Block],

537 downcast,

538 using_cow: bool,

539 caller: str,

540 ) -> list[Block]:

541 if downcast is False:

542 return blocks

543

544 if self.dtype == _dtype_obj:

545 # TODO: does it matter that self.dtype might not match blocks[i].dtype?

546 # GH#44241 We downcast regardless of the argument;

547 # respecting 'downcast=None' may be worthwhile at some point,

548 # but ATM it breaks too much existing code.

549 # split and convert the blocks

550

551 if caller == "fillna" and get_option("future.no_silent_downcasting"):

552 return blocks

553

554 nbs = extend_blocks(

555 [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]

556 )

557 if caller == "fillna":

558 if len(nbs) != len(blocks) or not all(

559 x.dtype == y.dtype for x, y in zip(nbs, blocks)

560 ):

561 # GH#54261

562 warnings.warn(

563 "Downcasting object dtype arrays on .fillna, .ffill, .bfill "

564 "is deprecated and will change in a future version. "

565 "Call result.infer_objects(copy=False) instead. "

566 "To opt-in to the future "

567 "behavior, set "

568 "`pd.set_option('future.no_silent_downcasting', True)`",

569 FutureWarning,

570 stacklevel=find_stack_level(),

571 )

572

573 return nbs

574

575 elif downcast is None:

576 return blocks

577 elif caller == "where" and get_option("future.no_silent_downcasting") is True:

578 return blocks

579 else:

580 nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])

581

582 # When _maybe_downcast is called with caller="where", it is either

583 # a) with downcast=False, which is a no-op (the desired future behavior)

584 # b) with downcast="infer", which is _not_ passed by the user.

585 # In the latter case the future behavior is to stop doing inference,

586 # so we issue a warning if and only if some inference occurred.

587 if caller == "where":

588 # GH#53656

589 if len(blocks) != len(nbs) or any(

590 left.dtype != right.dtype for left, right in zip(blocks, nbs)

591 ):

592 # In this case _maybe_downcast was _not_ a no-op, so the behavior

593 # will change, so we issue a warning.

594 warnings.warn(

595 "Downcasting behavior in Series and DataFrame methods 'where', "

596 "'mask', and 'clip' is deprecated. In a future "

597 "version this will not infer object dtypes or cast all-round "

598 "floats to integers. Instead call "

599 "result.infer_objects(copy=False) for object inference, "

600 "or cast round floats explicitly. To opt-in to the future "

601 "behavior, set "

602 "`pd.set_option('future.no_silent_downcasting', True)`",

603 FutureWarning,

604 stacklevel=find_stack_level(),

605 )

606

607 return nbs

608

609 @final

610 @maybe_split

611 def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]:

612 """

613 downcast specialized to 2D case post-validation.

614

615 Refactored to allow use of maybe_split.

616 """

617 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)

618 new_values = maybe_coerce_values(new_values)

619 refs = self.refs if new_values is self.values else None

620 return [self.make_block(new_values, refs=refs)]

621

622 @final

623 def convert(

624 self,

625 *,

626 copy: bool = True,

627 using_cow: bool = False,

628 ) -> list[Block]:

629 """

630 Attempt to coerce any object types to better types. Return a copy

631 of the block (if copy = True).

632 """

633 if not self.is_object:

634 if not copy and using_cow:

635 return [self.copy(deep=False)]

636 return [self.copy()] if copy else [self]

637

638 if self.ndim != 1 and self.shape[0] != 1:

639 blocks = self.split_and_operate(

640 Block.convert, copy=copy, using_cow=using_cow

641 )

642 if all(blk.dtype.kind == "O" for blk in blocks):

643 # Avoid fragmenting the block if convert is a no-op

644 if using_cow:

645 return [self.copy(deep=False)]

646 return [self.copy()] if copy else [self]

647 return blocks

648

649 values = self.values

650 if values.ndim == 2:

651 # the check above ensures we only get here with values.shape[0] == 1,

652 # avoid doing .ravel as that might make a copy

653 values = values[0]

654

655 res_values = lib.maybe_convert_objects(

656 values, # type: ignore[arg-type]

657 convert_non_numeric=True,

658 )

659 refs = None

660 if copy and res_values is values:

661 res_values = values.copy()

662 elif res_values is values:

663 refs = self.refs

664

665 res_values = ensure_block_shape(res_values, self.ndim)

666 res_values = maybe_coerce_values(res_values)

667 return [self.make_block(res_values, refs=refs)]

668

669 def convert_dtypes(

670 self,

671 copy: bool,

672 using_cow: bool,

673 infer_objects: bool = True,

674 convert_string: bool = True,

675 convert_integer: bool = True,

676 convert_boolean: bool = True,

677 convert_floating: bool = True,

678 dtype_backend: DtypeBackend = "numpy_nullable",

679 ) -> list[Block]:

680 if infer_objects and self.is_object:

681 blks = self.convert(copy=False, using_cow=using_cow)

682 else:

683 blks = [self]

684

685 if not any(

686 [convert_floating, convert_integer, convert_boolean, convert_string]

687 ):

688 return [b.copy(deep=copy) for b in blks]

689

690 rbs = []

691 for blk in blks:

692 # Determine dtype column by column

693 sub_blks = [blk] if blk.ndim == 1 or self.shape[0] == 1 else blk._split()

694 dtypes = [

695 convert_dtypes(

696 b.values,

697 convert_string,

698 convert_integer,

699 convert_boolean,

700 convert_floating,

701 infer_objects,

702 dtype_backend,

703 )

704 for b in sub_blks

705 ]

706 if all(dtype == self.dtype for dtype in dtypes):

707 # Avoid block splitting if no dtype changes

708 rbs.append(blk.copy(deep=copy))

709 continue

710

711 for dtype, b in zip(dtypes, sub_blks):

712 rbs.append(b.astype(dtype=dtype, copy=copy, squeeze=b.ndim != 1))

713 return rbs

714

715 # ---------------------------------------------------------------------

716 # Array-Like Methods

717

718 @final

719 @cache_readonly

720 def dtype(self) -> DtypeObj:

721 return self.values.dtype

722

723 @final

724 def astype(

725 self,

726 dtype: DtypeObj,

727 copy: bool = False,

728 errors: IgnoreRaise = "raise",

729 using_cow: bool = False,

730 squeeze: bool = False,

731 ) -> Block:

732 """

733 Coerce to the new dtype.

734

735 Parameters

736 ----------

737 dtype : np.dtype or ExtensionDtype

738 copy : bool, default False

739 copy if indicated

740 errors : str, {'raise', 'ignore'}, default 'raise'

741 - ``raise`` : allow exceptions to be raised

742 - ``ignore`` : suppress exceptions. On error return original object

743 using_cow: bool, default False

744 Signaling if copy on write copy logic is used.

745 squeeze : bool, default False

746 squeeze values to ndim=1 if only one column is given

747

748 Returns

749 -------

750 Block

751 """

752 values = self.values

753 if squeeze and values.ndim == 2 and is_1d_only_ea_dtype(dtype):

754 if values.shape[0] != 1:

755 raise ValueError("Can not squeeze with more than one column.")

756 values = values[0, :] # type: ignore[call-overload]

757

758 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)

759

760 new_values = maybe_coerce_values(new_values)

761

762 refs = None

763 if (using_cow or not copy) and astype_is_view(values.dtype, new_values.dtype):

764 refs = self.refs

765

766 newb = self.make_block(new_values, refs=refs)

767 if newb.shape != self.shape:

768 raise TypeError(

769 f"cannot set astype for copy = [{copy}] for dtype "

770 f"({self.dtype.name} [{self.shape}]) to different shape "

771 f"({newb.dtype.name} [{newb.shape}])"

772 )

773 return newb

774

775 @final

776 def get_values_for_csv(

777 self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None

778 ) -> Block:

779 """convert to our native types format"""

780 result = get_values_for_csv(

781 self.values,

782 na_rep=na_rep,

783 quoting=quoting,

784 float_format=float_format,

785 date_format=date_format,

786 decimal=decimal,

787 )

788 return self.make_block(result)

789

790 @final

791 def copy(self, deep: bool = True) -> Self:

792 """copy constructor"""

793 values = self.values

794 refs: BlockValuesRefs | None

795 if deep:

796 values = values.copy()

797 refs = None

798 else:

799 refs = self.refs

800 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs)

801

802 # ---------------------------------------------------------------------

803 # Copy-on-Write Helpers

804

805 @final

806 def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self:

807 if using_cow and inplace:

808 deep = self.refs.has_reference()

809 blk = self.copy(deep=deep)

810 else:

811 blk = self if inplace else self.copy()

812 return blk

813

814 @final

815 def _get_refs_and_copy(self, using_cow: bool, inplace: bool):

816 refs = None

817 copy = not inplace

818 if inplace:

819 if using_cow and self.refs.has_reference():

820 copy = True

821 else:

822 refs = self.refs

823 return copy, refs

824

825 # ---------------------------------------------------------------------

826 # Replace

827

828 @final

829 def replace(

830 self,

831 to_replace,

832 value,

833 inplace: bool = False,

834 # mask may be pre-computed if we're called from replace_list

835 mask: npt.NDArray[np.bool_] | None = None,

836 using_cow: bool = False,

837 already_warned=None,

838 ) -> list[Block]:

839 """

840 replace the to_replace value with value, possible to create new

841 blocks here this is just a call to putmask.

842 """

843

844 # Note: the checks we do in NDFrame.replace ensure we never get

845 # here with listlike to_replace or value, as those cases

846 # go through replace_list

847 values = self.values

848

849 if isinstance(values, Categorical):

850 # TODO: avoid special-casing

851 # GH49404

852 blk = self._maybe_copy(using_cow, inplace)

853 values = cast(Categorical, blk.values)

854 values._replace(to_replace=to_replace, value=value, inplace=True)

855 return [blk]

856

857 if not self._can_hold_element(to_replace):

858 # We cannot hold `to_replace`, so we know immediately that

859 # replacing it is a no-op.

860 # Note: If to_replace were a list, NDFrame.replace would call

861 # replace_list instead of replace.

862 if using_cow:

863 return [self.copy(deep=False)]

864 else:

865 return [self] if inplace else [self.copy()]

866

867 if mask is None:

868 mask = missing.mask_missing(values, to_replace)

869 if not mask.any():

870 # Note: we get here with test_replace_extension_other incorrectly

871 # bc _can_hold_element is incorrect.

872 if using_cow:

873 return [self.copy(deep=False)]

874 else:

875 return [self] if inplace else [self.copy()]

876

877 elif self._can_hold_element(value):

878 # TODO(CoW): Maybe split here as well into columns where mask has True

879 # and rest?

880 blk = self._maybe_copy(using_cow, inplace)

881 putmask_inplace(blk.values, mask, value)

882 if (

883 inplace

884 and warn_copy_on_write()

885 and already_warned is not None

886 and not already_warned.warned_already

887 ):

888 if self.refs.has_reference():

889 warnings.warn(

890 COW_WARNING_GENERAL_MSG,

891 FutureWarning,

892 stacklevel=find_stack_level(),

893 )

894 already_warned.warned_already = True

895

896 if not (self.is_object and value is None):

897 # if the user *explicitly* gave None, we keep None, otherwise

898 # may downcast to NaN

899 if get_option("future.no_silent_downcasting") is True:

900 blocks = [blk]

901 else:

902 blocks = blk.convert(copy=False, using_cow=using_cow)

903 if len(blocks) > 1 or blocks[0].dtype != blk.dtype:

904 warnings.warn(

905 # GH#54710

906 "Downcasting behavior in `replace` is deprecated and "

907 "will be removed in a future version. To retain the old "

908 "behavior, explicitly call "

909 "`result.infer_objects(copy=False)`. "

910 "To opt-in to the future "

911 "behavior, set "

912 "`pd.set_option('future.no_silent_downcasting', True)`",

913 FutureWarning,

914 stacklevel=find_stack_level(),

915 )

916 else:

917 blocks = [blk]

918 return blocks

919

920 elif self.ndim == 1 or self.shape[0] == 1:

921 if value is None or value is NA:

922 blk = self.astype(np.dtype(object))

923 else:

924 blk = self.coerce_to_target_dtype(value)

925 return blk.replace(

926 to_replace=to_replace,

927 value=value,

928 inplace=True,

929 mask=mask,

930 )

931

932 else:

933 # split so that we only upcast where necessary

934 blocks = []

935 for i, nb in enumerate(self._split()):

936 blocks.extend(

937 type(self).replace(

938 nb,

939 to_replace=to_replace,

940 value=value,

941 inplace=True,

942 mask=mask[i : i + 1],

943 using_cow=using_cow,

944 )

945 )

946 return blocks

947

948 @final

949 def _replace_regex(

950 self,

951 to_replace,

952 value,

953 inplace: bool = False,

954 mask=None,

955 using_cow: bool = False,

956 already_warned=None,

957 ) -> list[Block]:

958 """

959 Replace elements by the given value.

960

961 Parameters

962 ----------

963 to_replace : object or pattern

964 Scalar to replace or regular expression to match.

965 value : object

966 Replacement object.

967 inplace : bool, default False

968 Perform inplace modification.

969 mask : array-like of bool, optional

970 True indicate corresponding element is ignored.

971 using_cow: bool, default False

972 Specifying if copy on write is enabled.

973

974 Returns

975 -------

976 List[Block]

977 """

978 if not self._can_hold_element(to_replace):

979 # i.e. only if self.is_object is True, but could in principle include a

980 # String ExtensionBlock

981 if using_cow:

982 return [self.copy(deep=False)]

983 return [self] if inplace else [self.copy()]

984

985 rx = re.compile(to_replace)

986

987 block = self._maybe_copy(using_cow, inplace)

988

989 replace_regex(block.values, rx, value, mask)

990

991 if (

992 inplace

993 and warn_copy_on_write()

994 and already_warned is not None

995 and not already_warned.warned_already

996 ):

997 if self.refs.has_reference():

998 warnings.warn(

999 COW_WARNING_GENERAL_MSG,

1000 FutureWarning,

1001 stacklevel=find_stack_level(),

1002 )

1003 already_warned.warned_already = True

1004

1005 nbs = block.convert(copy=False, using_cow=using_cow)

1006 opt = get_option("future.no_silent_downcasting")

1007 if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:

1008 warnings.warn(

1009 # GH#54710

1010 "Downcasting behavior in `replace` is deprecated and "

1011 "will be removed in a future version. To retain the old "

1012 "behavior, explicitly call `result.infer_objects(copy=False)`. "

1013 "To opt-in to the future "

1014 "behavior, set "

1015 "`pd.set_option('future.no_silent_downcasting', True)`",

1016 FutureWarning,

1017 stacklevel=find_stack_level(),

1018 )

1019 return nbs

1020

1021 @final

1022 def replace_list(

1023 self,

1024 src_list: Iterable[Any],

1025 dest_list: Sequence[Any],

1026 inplace: bool = False,

1027 regex: bool = False,

1028 using_cow: bool = False,

1029 already_warned=None,

1030 ) -> list[Block]:

1031 """

1032 See BlockManager.replace_list docstring.

1033 """

1034 values = self.values

1035

1036 if isinstance(values, Categorical):

1037 # TODO: avoid special-casing

1038 # GH49404

1039 blk = self._maybe_copy(using_cow, inplace)

1040 values = cast(Categorical, blk.values)

1041 values._replace(to_replace=src_list, value=dest_list, inplace=True)

1042 return [blk]

1043

1044 # Exclude anything that we know we won't contain

1045 pairs = [

1046 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)

1047 ]

1048 if not len(pairs):

1049 if using_cow:

1050 return [self.copy(deep=False)]

1051 # shortcut, nothing to replace

1052 return [self] if inplace else [self.copy()]

1053

1054 src_len = len(pairs) - 1

1055

1056 if is_string_dtype(values.dtype):

1057 # Calculate the mask once, prior to the call of comp

1058 # in order to avoid repeating the same computations

1059 na_mask = ~isna(values)

1060 masks: Iterable[npt.NDArray[np.bool_]] = (

1061 extract_bool_array(

1062 cast(

1063 ArrayLike,

1064 compare_or_regex_search(

1065 values, s[0], regex=regex, mask=na_mask

1066 ),

1067 )

1068 )

1069 for s in pairs

1070 )

1071 else:

1072 # GH#38086 faster if we know we dont need to check for regex

1073 masks = (missing.mask_missing(values, s[0]) for s in pairs)

1074 # Materialize if inplace = True, since the masks can change

1075 # as we replace

1076 if inplace:

1077 masks = list(masks)

1078

1079 if using_cow:

1080 # Don't set up refs here, otherwise we will think that we have

1081 # references when we check again later

1082 rb = [self]

1083 else:

1084 rb = [self if inplace else self.copy()]

1085

1086 if (

1087 inplace

1088 and warn_copy_on_write()

1089 and already_warned is not None

1090 and not already_warned.warned_already

1091 ):

1092 if self.refs.has_reference():

1093 warnings.warn(

1094 COW_WARNING_GENERAL_MSG,

1095 FutureWarning,

1096 stacklevel=find_stack_level(),

1097 )

1098 already_warned.warned_already = True

1099

1100 opt = get_option("future.no_silent_downcasting")

1101 for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):

1102 convert = i == src_len # only convert once at the end

1103 new_rb: list[Block] = []

1104

1105 # GH-39338: _replace_coerce can split a block into

1106 # single-column blocks, so track the index so we know

1107 # where to index into the mask

1108 for blk_num, blk in enumerate(rb):

1109 if len(rb) == 1:

1110 m = mask

1111 else:

1112 mib = mask

1113 assert not isinstance(mib, bool)

1114 m = mib[blk_num : blk_num + 1]

1115

1116 # error: Argument "mask" to "_replace_coerce" of "Block" has

1117 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]";

1118 # expected "ndarray[Any, dtype[bool_]]"

1119 result = blk._replace_coerce(

1120 to_replace=src,

1121 value=dest,

1122 mask=m,

1123 inplace=inplace,

1124 regex=regex,

1125 using_cow=using_cow,

1126 )

1127

1128 if using_cow and i != src_len:

1129 # This is ugly, but we have to get rid of intermediate refs

1130 # that did not go out of scope yet, otherwise we will trigger

1131 # many unnecessary copies

1132 for b in result:

1133 ref = weakref.ref(b)

1134 b.refs.referenced_blocks.pop(

1135 b.refs.referenced_blocks.index(ref)

1136 )

1137

1138 if (

1139 not opt

1140 and convert

1141 and blk.is_object

1142 and not all(x is None for x in dest_list)

1143 ):

1144 # GH#44498 avoid unwanted cast-back

1145 nbs = []

1146 for res_blk in result:

1147 converted = res_blk.convert(

1148 copy=True and not using_cow, using_cow=using_cow

1149 )

1150 if len(converted) > 1 or converted[0].dtype != res_blk.dtype:

1151 warnings.warn(

1152 # GH#54710

1153 "Downcasting behavior in `replace` is deprecated "

1154 "and will be removed in a future version. To "

1155 "retain the old behavior, explicitly call "

1156 "`result.infer_objects(copy=False)`. "

1157 "To opt-in to the future "

1158 "behavior, set "

1159 "`pd.set_option('future.no_silent_downcasting', True)`",

1160 FutureWarning,

1161 stacklevel=find_stack_level(),

1162 )

1163 nbs.extend(converted)

1164 result = nbs

1165 new_rb.extend(result)

1166 rb = new_rb

1167 return rb

1168

1169 @final

1170 def _replace_coerce(

1171 self,

1172 to_replace,

1173 value,

1174 mask: npt.NDArray[np.bool_],

1175 inplace: bool = True,

1176 regex: bool = False,

1177 using_cow: bool = False,

1178 ) -> list[Block]:

1179 """

1180 Replace value corresponding to the given boolean array with another

1181 value.

1182

1183 Parameters

1184 ----------

1185 to_replace : object or pattern

1186 Scalar to replace or regular expression to match.

1187 value : object

1188 Replacement object.

1189 mask : np.ndarray[bool]

1190 True indicate corresponding element is ignored.

1191 inplace : bool, default True

1192 Perform inplace modification.

1193 regex : bool, default False

1194 If true, perform regular expression substitution.

1195

1196 Returns

1197 -------

1198 List[Block]

1199 """

1200 if should_use_regex(regex, to_replace):

1201 return self._replace_regex(

1202 to_replace,

1203 value,

1204 inplace=inplace,

1205 mask=mask,

1206 )

1207 else:

1208 if value is None:

1209 # gh-45601, gh-45836, gh-46634

1210 if mask.any():

1211 has_ref = self.refs.has_reference()

1212 nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow)

1213 if (nb is self or using_cow) and not inplace:

1214 nb = nb.copy()

1215 elif inplace and has_ref and nb.refs.has_reference() and using_cow:

1216 # no copy in astype and we had refs before

1217 nb = nb.copy()

1218 putmask_inplace(nb.values, mask, value)

1219 return [nb]

1220 if using_cow:

1221 return [self]

1222 return [self] if inplace else [self.copy()]

1223 return self.replace(

1224 to_replace=to_replace,

1225 value=value,

1226 inplace=inplace,

1227 mask=mask,

1228 using_cow=using_cow,

1229 )

1230

1231 # ---------------------------------------------------------------------

1232 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock

1233 # but not ExtensionBlock

1234

1235 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:

1236 """

1237 For compatibility with 1D-only ExtensionArrays.

1238 """

1239 return arg

1240

1241 def _unwrap_setitem_indexer(self, indexer):

1242 """

1243 For compatibility with 1D-only ExtensionArrays.

1244 """

1245 return indexer

1246

1247 # NB: this cannot be made cache_readonly because in mgr.set_values we pin

1248 # new .values that can have different shape GH#42631

1249 @property

1250 def shape(self) -> Shape:

1251 return self.values.shape

1252

1253 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray:

1254 # In the case where we have a tuple[slice, int], the slice will always

1255 # be slice(None)

1256 # Note: only reached with self.ndim == 2

1257 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]"

1258 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type

1259 # "Union[int, integer[Any]]"

1260 return self.values[i] # type: ignore[index]

1261

1262 def _slice(

1263 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]

1264 ) -> ArrayLike:

1265 """return a slice of my values"""

1266

1267 return self.values[slicer]

1268

1269 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:

1270 """

1271 Modify block values in-place with new item value.

1272

1273 If copy=True, first copy the underlying values in place before modifying

1274 (for Copy-on-Write).

1275

1276 Notes

1277 -----

1278 `set_inplace` never creates a new array or new Block, whereas `setitem`

1279 _may_ create a new array and always creates a new Block.

1280

1281 Caller is responsible for checking values.dtype == self.dtype.

1282 """

1283 if copy:

1284 self.values = self.values.copy()

1285 self.values[locs] = values

1286

1287 @final

1288 def take_nd(

1289 self,

1290 indexer: npt.NDArray[np.intp],

1291 axis: AxisInt,

1292 new_mgr_locs: BlockPlacement | None = None,

1293 fill_value=lib.no_default,

1294 ) -> Block:

1295 """

1296 Take values according to indexer and return them as a block.

1297 """

1298 values = self.values

1299

1300 if fill_value is lib.no_default:

1301 fill_value = self.fill_value

1302 allow_fill = False

1303 else:

1304 allow_fill = True

1305

1306 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype

1307 new_values = algos.take_nd(

1308 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value

1309 )

1310

1311 # Called from three places in managers, all of which satisfy

1312 # these assertions

1313 if isinstance(self, ExtensionBlock):

1314 # NB: in this case, the 'axis' kwarg will be ignored in the

1315 # algos.take_nd call above.

1316 assert not (self.ndim == 1 and new_mgr_locs is None)

1317 assert not (axis == 0 and new_mgr_locs is None)

1318

1319 if new_mgr_locs is None:

1320 new_mgr_locs = self._mgr_locs

1321

1322 if new_values.dtype != self.dtype:

1323 return self.make_block(new_values, new_mgr_locs)

1324 else:

1325 return self.make_block_same_class(new_values, new_mgr_locs)

1326

1327 def _unstack(

1328 self,

1329 unstacker,

1330 fill_value,

1331 new_placement: npt.NDArray[np.intp],

1332 needs_masking: npt.NDArray[np.bool_],

1333 ):

1334 """

1335 Return a list of unstacked blocks of self

1336

1337 Parameters

1338 ----------

1339 unstacker : reshape._Unstacker

1340 fill_value : int

1341 Only used in ExtensionBlock._unstack

1342 new_placement : np.ndarray[np.intp]

1343 allow_fill : bool

1344 needs_masking : np.ndarray[bool]

1345

1346 Returns

1347 -------

1348 blocks : list of Block

1349 New blocks of unstacked values.

1350 mask : array-like of bool

1351 The mask of columns of `blocks` we should keep.

1352 """

1353 new_values, mask = unstacker.get_new_values(

1354 self.values.T, fill_value=fill_value

1355 )

1356

1357 mask = mask.any(0)

1358 # TODO: in all tests we have mask.all(); can we rely on that?

1359

1360 # Note: these next two lines ensure that

1361 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)

1362 # which the calling function needs in order to pass verify_integrity=False

1363 # to the BlockManager constructor

1364 new_values = new_values.T[mask]

1365 new_placement = new_placement[mask]

1366

1367 bp = BlockPlacement(new_placement)

1368 blocks = [new_block_2d(new_values, placement=bp)]

1369 return blocks, mask

1370

1371 # ---------------------------------------------------------------------

1372

1373 def setitem(self, indexer, value, using_cow: bool = False) -> Block:

1374 """

1375 Attempt self.values[indexer] = value, possibly creating a new array.

1376

1377 Parameters

1378 ----------

1379 indexer : tuple, list-like, array-like, slice, int

1380 The subset of self.values to set

1381 value : object

1382 The value being set

1383 using_cow: bool, default False

1384 Signaling if CoW is used.

1385

1386 Returns

1387 -------

1388 Block

1389

1390 Notes

1391 -----

1392 `indexer` is a direct slice/positional indexer. `value` must

1393 be a compatible shape.

1394 """

1395

1396 value = self._standardize_fill_value(value)

1397

1398 values = cast(np.ndarray, self.values)

1399 if self.ndim == 2:

1400 values = values.T

1401

1402 # length checking

1403 check_setitem_lengths(indexer, value, values)

1404

1405 if self.dtype != _dtype_obj:

1406 # GH48933: extract_array would convert a pd.Series value to np.ndarray

1407 value = extract_array(value, extract_numpy=True)

1408 try:

1409 casted = np_can_hold_element(values.dtype, value)

1410 except LossySetitemError:

1411 # current dtype cannot store value, coerce to common dtype

1412 nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)

1413 return nb.setitem(indexer, value)

1414 else:

1415 if self.dtype == _dtype_obj:

1416 # TODO: avoid having to construct values[indexer]

1417 vi = values[indexer]

1418 if lib.is_list_like(vi):

1419 # checking lib.is_scalar here fails on

1420 # test_iloc_setitem_custom_object

1421 casted = setitem_datetimelike_compat(values, len(vi), casted)

1422

1423 self = self._maybe_copy(using_cow, inplace=True)

1424 values = cast(np.ndarray, self.values.T)

1425 if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1:

1426 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615

1427 casted = casted[0, ...]

1428 try:

1429 values[indexer] = casted

1430 except (TypeError, ValueError) as err:

1431 if is_list_like(casted):

1432 raise ValueError(

1433 "setting an array element with a sequence."

1434 ) from err

1435 raise

1436 return self

1437

1438 def putmask(

1439 self, mask, new, using_cow: bool = False, already_warned=None

1440 ) -> list[Block]:

1441 """

1442 putmask the data to the block; it is possible that we may create a

1443 new dtype of block

1444

1445 Return the resulting block(s).

1446

1447 Parameters

1448 ----------

1449 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray

1450 new : a ndarray/object

1451 using_cow: bool, default False

1452

1453 Returns

1454 -------

1455 List[Block]

1456 """

1457 orig_mask = mask

1458 values = cast(np.ndarray, self.values)

1459 mask, noop = validate_putmask(values.T, mask)

1460 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))

1461

1462 if new is lib.no_default:

1463 new = self.fill_value

1464

1465 new = self._standardize_fill_value(new)

1466 new = extract_array(new, extract_numpy=True)

1467

1468 if noop:

1469 if using_cow:

1470 return [self.copy(deep=False)]

1471 return [self]

1472

1473 if (

1474 warn_copy_on_write()

1475 and already_warned is not None

1476 and not already_warned.warned_already

1477 ):

1478 if self.refs.has_reference():

1479 warnings.warn(

1480 COW_WARNING_GENERAL_MSG,

1481 FutureWarning,

1482 stacklevel=find_stack_level(),

1483 )

1484 already_warned.warned_already = True

1485

1486 try:

1487 casted = np_can_hold_element(values.dtype, new)

1488

1489 self = self._maybe_copy(using_cow, inplace=True)

1490 values = cast(np.ndarray, self.values)

1491

1492 putmask_without_repeat(values.T, mask, casted)

1493 return [self]

1494 except LossySetitemError:

1495 if self.ndim == 1 or self.shape[0] == 1:

1496 # no need to split columns

1497

1498 if not is_list_like(new):

1499 # using just new[indexer] can't save us the need to cast

1500 return self.coerce_to_target_dtype(

1501 new, warn_on_upcast=True

1502 ).putmask(mask, new)

1503 else:

1504 indexer = mask.nonzero()[0]

1505 nb = self.setitem(indexer, new[indexer], using_cow=using_cow)

1506 return [nb]

1507

1508 else:

1509 is_array = isinstance(new, np.ndarray)

1510

1511 res_blocks = []

1512 nbs = self._split()

1513 for i, nb in enumerate(nbs):

1514 n = new

1515 if is_array:

1516 # we have a different value per-column

1517 n = new[:, i : i + 1]

1518

1519 submask = orig_mask[:, i : i + 1]

1520 rbs = nb.putmask(submask, n, using_cow=using_cow)

1521 res_blocks.extend(rbs)

1522 return res_blocks

1523

1524 def where(

1525 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False

1526 ) -> list[Block]:

1527 """

1528 evaluate the block; return result block(s) from the result

1529

1530 Parameters

1531 ----------

1532 other : a ndarray/object

1533 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray

1534 _downcast : str or None, default "infer"

1535 Private because we only specify it when calling from fillna.

1536

1537 Returns

1538 -------

1539 List[Block]

1540 """

1541 assert cond.ndim == self.ndim

1542 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame))

1543

1544 transpose = self.ndim == 2

1545

1546 cond = extract_bool_array(cond)

1547

1548 # EABlocks override where

1549 values = cast(np.ndarray, self.values)

1550 orig_other = other

1551 if transpose:

1552 values = values.T

1553

1554 icond, noop = validate_putmask(values, ~cond)

1555 if noop:

1556 # GH-39595: Always return a copy; short-circuit up/downcasting

1557 if using_cow:

1558 return [self.copy(deep=False)]

1559 return [self.copy()]

1560

1561 if other is lib.no_default:

1562 other = self.fill_value

1563

1564 other = self._standardize_fill_value(other)

1565

1566 try:

1567 # try/except here is equivalent to a self._can_hold_element check,

1568 # but this gets us back 'casted' which we will reuse below;

1569 # without using 'casted', expressions.where may do unwanted upcasts.

1570 casted = np_can_hold_element(values.dtype, other)

1571 except (ValueError, TypeError, LossySetitemError):

1572 # we cannot coerce, return a compat dtype

1573

1574 if self.ndim == 1 or self.shape[0] == 1:

1575 # no need to split columns

1576

1577 block = self.coerce_to_target_dtype(other)

1578 blocks = block.where(orig_other, cond, using_cow=using_cow)

1579 return self._maybe_downcast(

1580 blocks, downcast=_downcast, using_cow=using_cow, caller="where"

1581 )

1582

1583 else:

1584 # since _maybe_downcast would split blocks anyway, we

1585 # can avoid some potential upcast/downcast by splitting

1586 # on the front end.

1587 is_array = isinstance(other, (np.ndarray, ExtensionArray))

1588

1589 res_blocks = []

1590 nbs = self._split()

1591 for i, nb in enumerate(nbs):

1592 oth = other

1593 if is_array:

1594 # we have a different value per-column

1595 oth = other[:, i : i + 1]

1596

1597 submask = cond[:, i : i + 1]

1598 rbs = nb.where(

1599 oth, submask, _downcast=_downcast, using_cow=using_cow

1600 )

1601 res_blocks.extend(rbs)

1602 return res_blocks

1603

1604 else:

1605 other = casted

1606 alt = setitem_datetimelike_compat(values, icond.sum(), other)

1607 if alt is not other:

1608 if is_list_like(other) and len(other) < len(values):

1609 # call np.where with other to get the appropriate ValueError

1610 np.where(~icond, values, other)

1611 raise NotImplementedError(

1612 "This should not be reached; call to np.where above is "

1613 "expected to raise ValueError. Please report a bug at "

1614 "github.com/pandas-dev/pandas"

1615 )

1616 result = values.copy()

1617 np.putmask(result, icond, alt)

1618 else:

1619 # By the time we get here, we should have all Series/Index

1620 # args extracted to ndarray

1621 if (

1622 is_list_like(other)

1623 and not isinstance(other, np.ndarray)

1624 and len(other) == self.shape[-1]

1625 ):

1626 # If we don't do this broadcasting here, then expressions.where

1627 # will broadcast a 1D other to be row-like instead of

1628 # column-like.

1629 other = np.array(other).reshape(values.shape)

1630 # If lengths don't match (or len(other)==1), we will raise

1631 # inside expressions.where, see test_series_where

1632

1633 # Note: expressions.where may upcast.

1634 result = expressions.where(~icond, values, other)

1635 # The np_can_hold_element check _should_ ensure that we always

1636 # have result.dtype == self.dtype here.

1637

1638 if transpose:

1639 result = result.T

1640

1641 return [self.make_block(result)]

1642

1643 def fillna(

1644 self,

1645 value,

1646 limit: int | None = None,

1647 inplace: bool = False,

1648 downcast=None,

1649 using_cow: bool = False,

1650 already_warned=None,

1651 ) -> list[Block]:

1652 """

1653 fillna on the block with the value. If we fail, then convert to

1654 block to hold objects instead and try again

1655 """

1656 # Caller is responsible for validating limit; if int it is strictly positive

1657 inplace = validate_bool_kwarg(inplace, "inplace")

1658

1659 if not self._can_hold_na:

1660 # can short-circuit the isna call

1661 noop = True

1662 else:

1663 mask = isna(self.values)

1664 mask, noop = validate_putmask(self.values, mask)

1665

1666 if noop:

1667 # we can't process the value, but nothing to do

1668 if inplace:

1669 if using_cow:

1670 return [self.copy(deep=False)]

1671 # Arbitrarily imposing the convention that we ignore downcast

1672 # on no-op when inplace=True

1673 return [self]

1674 else:

1675 # GH#45423 consistent downcasting on no-ops.

1676 nb = self.copy(deep=not using_cow)

1677 nbs = nb._maybe_downcast(

1678 [nb], downcast=downcast, using_cow=using_cow, caller="fillna"

1679 )

1680 return nbs

1681

1682 if limit is not None:

1683 mask[mask.cumsum(self.ndim - 1) > limit] = False

1684

1685 if inplace:

1686 nbs = self.putmask(

1687 mask.T, value, using_cow=using_cow, already_warned=already_warned

1688 )

1689 else:

1690 # without _downcast, we would break

1691 # test_fillna_dtype_conversion_equiv_replace

1692 nbs = self.where(value, ~mask.T, _downcast=False)

1693

1694 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs)

1695 # makes a difference bc blk may have object dtype, which has

1696 # different behavior in _maybe_downcast.

1697 return extend_blocks(

1698 [

1699 blk._maybe_downcast(

1700 [blk], downcast=downcast, using_cow=using_cow, caller="fillna"

1701 )

1702 for blk in nbs

1703 ]

1704 )

1705

1706 def pad_or_backfill(

1707 self,

1708 *,

1709 method: FillnaOptions,

1710 axis: AxisInt = 0,

1711 inplace: bool = False,

1712 limit: int | None = None,

1713 limit_area: Literal["inside", "outside"] | None = None,

1714 downcast: Literal["infer"] | None = None,

1715 using_cow: bool = False,

1716 already_warned=None,

1717 ) -> list[Block]:

1718 if not self._can_hold_na:

1719 # If there are no NAs, then interpolate is a no-op

1720 if using_cow:

1721 return [self.copy(deep=False)]

1722 return [self] if inplace else [self.copy()]

1723

1724 copy, refs = self._get_refs_and_copy(using_cow, inplace)

1725

1726 # Dispatch to the NumpyExtensionArray method.

1727 # We know self.array_values is a NumpyExtensionArray bc EABlock overrides

1728 vals = cast(NumpyExtensionArray, self.array_values)

1729 if axis == 1:

1730 vals = vals.T

1731 new_values = vals._pad_or_backfill(

1732 method=method,

1733 limit=limit,

1734 limit_area=limit_area,

1735 copy=copy,

1736 )

1737 if (

1738 not copy

1739 and warn_copy_on_write()

1740 and already_warned is not None

1741 and not already_warned.warned_already

1742 ):

1743 if self.refs.has_reference():

1744 warnings.warn(

1745 COW_WARNING_GENERAL_MSG,

1746 FutureWarning,

1747 stacklevel=find_stack_level(),

1748 )

1749 already_warned.warned_already = True

1750 if axis == 1:

1751 new_values = new_values.T

1752

1753 data = extract_array(new_values, extract_numpy=True)

1754

1755 nb = self.make_block_same_class(data, refs=refs)

1756 return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna")

1757

1758 @final

1759 def interpolate(

1760 self,

1761 *,

1762 method: InterpolateOptions,

1763 index: Index,

1764 inplace: bool = False,

1765 limit: int | None = None,

1766 limit_direction: Literal["forward", "backward", "both"] = "forward",

1767 limit_area: Literal["inside", "outside"] | None = None,

1768 downcast: Literal["infer"] | None = None,

1769 using_cow: bool = False,

1770 already_warned=None,

1771 **kwargs,

1772 ) -> list[Block]:

1773 inplace = validate_bool_kwarg(inplace, "inplace")

1774 # error: Non-overlapping equality check [...]

1775 if method == "asfreq": # type: ignore[comparison-overlap]

1776 # clean_fill_method used to allow this

1777 missing.clean_fill_method(method)

1778

1779 if not self._can_hold_na:

1780 # If there are no NAs, then interpolate is a no-op

1781 if using_cow:

1782 return [self.copy(deep=False)]

1783 return [self] if inplace else [self.copy()]

1784

1785 # TODO(3.0): this case will not be reachable once GH#53638 is enforced

1786 if self.dtype == _dtype_obj:

1787 # only deal with floats

1788 # bc we already checked that can_hold_na, we don't have int dtype here

1789 # test_interp_basic checks that we make a copy here

1790 if using_cow:

1791 return [self.copy(deep=False)]

1792 return [self] if inplace else [self.copy()]

1793

1794 copy, refs = self._get_refs_and_copy(using_cow, inplace)

1795

1796 # Dispatch to the EA method.

1797 new_values = self.array_values.interpolate(

1798 method=method,

1799 axis=self.ndim - 1,

1800 index=index,

1801 limit=limit,

1802 limit_direction=limit_direction,

1803 limit_area=limit_area,

1804 copy=copy,

1805 **kwargs,

1806 )

1807 data = extract_array(new_values, extract_numpy=True)

1808

1809 if (

1810 not copy

1811 and warn_copy_on_write()

1812 and already_warned is not None

1813 and not already_warned.warned_already

1814 ):

1815 if self.refs.has_reference():

1816 warnings.warn(

1817 COW_WARNING_GENERAL_MSG,

1818 FutureWarning,

1819 stacklevel=find_stack_level(),

1820 )

1821 already_warned.warned_already = True

1822

1823 nb = self.make_block_same_class(data, refs=refs)

1824 return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")

1825

1826 @final

1827 def diff(self, n: int) -> list[Block]:

1828 """return block for the diff of the values"""

1829 # only reached with ndim == 2

1830 # TODO(EA2D): transpose will be unnecessary with 2D EAs

1831 new_values = algos.diff(self.values.T, n, axis=0).T

1832 return [self.make_block(values=new_values)]

1833

1834 def shift(self, periods: int, fill_value: Any = None) -> list[Block]:

1835 """shift the block by periods, possibly upcast"""

1836 # convert integer to float if necessary. need to do a lot more than

1837 # that, handle boolean etc also

1838 axis = self.ndim - 1

1839

1840 # Note: periods is never 0 here, as that is handled at the top of

1841 # NDFrame.shift. If that ever changes, we can do a check for periods=0

1842 # and possibly avoid coercing.

1843

1844 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj:

1845 # with object dtype there is nothing to promote, and the user can

1846 # pass pretty much any weird fill_value they like

1847 # see test_shift_object_non_scalar_fill

1848 raise ValueError("fill_value must be a scalar")

1849

1850 fill_value = self._standardize_fill_value(fill_value)

1851

1852 try:

1853 # error: Argument 1 to "np_can_hold_element" has incompatible type

1854 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"

1855 casted = np_can_hold_element(

1856 self.dtype, fill_value # type: ignore[arg-type]

1857 )

1858 except LossySetitemError:

1859 nb = self.coerce_to_target_dtype(fill_value)

1860 return nb.shift(periods, fill_value=fill_value)

1861

1862 else:

1863 values = cast(np.ndarray, self.values)

1864 new_values = shift(values, periods, axis, casted)

1865 return [self.make_block_same_class(new_values)]

1866

1867 @final

1868 def quantile(

1869 self,

1870 qs: Index, # with dtype float64

1871 interpolation: QuantileInterpolation = "linear",

1872 ) -> Block:

1873 """

1874 compute the quantiles of the

1875

1876 Parameters

1877 ----------

1878 qs : Index

1879 The quantiles to be computed in float64.

1880 interpolation : str, default 'linear'

1881 Type of interpolation.

1882

1883 Returns

1884 -------

1885 Block

1886 """

1887 # We should always have ndim == 2 because Series dispatches to DataFrame

1888 assert self.ndim == 2

1889 assert is_list_like(qs) # caller is responsible for this

1890

1891 result = quantile_compat(self.values, np.asarray(qs._values), interpolation)

1892 # ensure_block_shape needed for cases where we start with EA and result

1893 # is ndarray, e.g. IntegerArray, SparseArray

1894 result = ensure_block_shape(result, ndim=2)

1895 return new_block_2d(result, placement=self._mgr_locs)

1896

1897 @final

1898 def round(self, decimals: int, using_cow: bool = False) -> Self:

1899 """

1900 Rounds the values.

1901 If the block is not of an integer or float dtype, nothing happens.

1902 This is consistent with DataFrame.round behavivor.

1903 (Note: Series.round would raise)

1904

1905 Parameters

1906 ----------

1907 decimals: int,

1908 Number of decimal places to round to.

1909 Caller is responsible for validating this

1910 using_cow: bool,

1911 Whether Copy on Write is enabled right now

1912 """

1913 if not self.is_numeric or self.is_bool:

1914 return self.copy(deep=not using_cow)

1915 refs = None

1916 # TODO: round only defined on BaseMaskedArray

1917 # Series also does this, so would need to fix both places

1918 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]"

1919 # has no attribute "round"

1920 values = self.values.round(decimals) # type: ignore[union-attr]

1921 if values is self.values:

1922 if not using_cow:

1923 # Normally would need to do this before, but

1924 # numpy only returns same array when round operation

1925 # is no-op

1926 # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636

1927 values = values.copy()

1928 else:

1929 refs = self.refs

1930 return self.make_block_same_class(values, refs=refs)

1931

1932 # ---------------------------------------------------------------------

1933 # Abstract Methods Overridden By EABackedBlock and NumpyBlock

1934

1935 def delete(self, loc) -> list[Block]:

1936 """Deletes the locs from the block.

1937

1938 We split the block to avoid copying the underlying data. We create new

1939 blocks for every connected segment of the initial block that is not deleted.

1940 The new blocks point to the initial array.

1941 """

1942 if not is_list_like(loc):

1943 loc = [loc]

1944

1945 if self.ndim == 1:

1946 values = cast(np.ndarray, self.values)

1947 values = np.delete(values, loc)

1948 mgr_locs = self._mgr_locs.delete(loc)

1949 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]

1950

1951 if np.max(loc) >= self.values.shape[0]:

1952 raise IndexError

1953

1954 # Add one out-of-bounds indexer as maximum to collect

1955 # all columns after our last indexer if any

1956 loc = np.concatenate([loc, [self.values.shape[0]]])

1957 mgr_locs_arr = self._mgr_locs.as_array

1958 new_blocks: list[Block] = []

1959

1960 previous_loc = -1

1961 # TODO(CoW): This is tricky, if parent block goes out of scope

1962 # all split blocks are referencing each other even though they

1963 # don't share data

1964 refs = self.refs if self.refs.has_reference() else None

1965 for idx in loc:

1966 if idx == previous_loc + 1:

1967 # There is no column between current and last idx

1968 pass

1969 else:

1970 # No overload variant of "__getitem__" of "ExtensionArray" matches

1971 # argument type "Tuple[slice, slice]"

1972 values = self.values[previous_loc + 1 : idx, :] # type: ignore[call-overload]

1973 locs = mgr_locs_arr[previous_loc + 1 : idx]

1974 nb = type(self)(

1975 values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs

1976 )

1977 new_blocks.append(nb)

1978

1979 previous_loc = idx

1980

1981 return new_blocks

1982

1983 @property

1984 def is_view(self) -> bool:

1985 """return a boolean if I am possibly a view"""

1986 raise AbstractMethodError(self)

1987

1988 @property

1989 def array_values(self) -> ExtensionArray:

1990 """

1991 The array that Series.array returns. Always an ExtensionArray.

1992 """

1993 raise AbstractMethodError(self)

1994

1995 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

1996 """

1997 return an internal format, currently just the ndarray

1998 this is often overridden to handle to_dense like operations

1999 """

2000 raise AbstractMethodError(self)

2001

2002

2003class EABackedBlock(Block):

2004 """

2005 Mixin for Block subclasses backed by ExtensionArray.

2006 """

2007

2008 values: ExtensionArray

2009

2010 @final

2011 def shift(self, periods: int, fill_value: Any = None) -> list[Block]:

2012 """

2013 Shift the block by `periods`.

2014

2015 Dispatches to underlying ExtensionArray and re-boxes in an

2016 ExtensionBlock.

2017 """

2018 # Transpose since EA.shift is always along axis=0, while we want to shift

2019 # along rows.

2020 new_values = self.values.T.shift(periods=periods, fill_value=fill_value).T

2021 return [self.make_block_same_class(new_values)]

2022

2023 @final

2024 def setitem(self, indexer, value, using_cow: bool = False):

2025 """

2026 Attempt self.values[indexer] = value, possibly creating a new array.

2027

2028 This differs from Block.setitem by not allowing setitem to change

2029 the dtype of the Block.

2030

2031 Parameters

2032 ----------

2033 indexer : tuple, list-like, array-like, slice, int

2034 The subset of self.values to set

2035 value : object

2036 The value being set

2037 using_cow: bool, default False

2038 Signaling if CoW is used.

2039

2040 Returns

2041 -------

2042 Block

2043

2044 Notes

2045 -----

2046 `indexer` is a direct slice/positional indexer. `value` must

2047 be a compatible shape.

2048 """

2049 orig_indexer = indexer

2050 orig_value = value

2051

2052 indexer = self._unwrap_setitem_indexer(indexer)

2053 value = self._maybe_squeeze_arg(value)

2054

2055 values = self.values

2056 if values.ndim == 2:

2057 # TODO(GH#45419): string[pyarrow] tests break if we transpose

2058 # unconditionally

2059 values = values.T

2060 check_setitem_lengths(indexer, value, values)

2061

2062 try:

2063 values[indexer] = value

2064 except (ValueError, TypeError):

2065 if isinstance(self.dtype, IntervalDtype):

2066 # see TestSetitemFloatIntervalWithIntIntervalValues

2067 nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)

2068 return nb.setitem(orig_indexer, orig_value)

2069

2070 elif isinstance(self, NDArrayBackedExtensionBlock):

2071 nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)

2072 return nb.setitem(orig_indexer, orig_value)

2073

2074 else:

2075 raise

2076

2077 else:

2078 return self

2079

2080 @final

2081 def where(

2082 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False

2083 ) -> list[Block]:

2084 # _downcast private bc we only specify it when calling from fillna

2085 arr = self.values.T

2086

2087 cond = extract_bool_array(cond)

2088

2089 orig_other = other

2090 orig_cond = cond

2091 other = self._maybe_squeeze_arg(other)

2092 cond = self._maybe_squeeze_arg(cond)

2093

2094 if other is lib.no_default:

2095 other = self.fill_value

2096

2097 icond, noop = validate_putmask(arr, ~cond)

2098 if noop:

2099 # GH#44181, GH#45135

2100 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast

2101 if using_cow:

2102 return [self.copy(deep=False)]

2103 return [self.copy()]

2104

2105 try:

2106 res_values = arr._where(cond, other).T

2107 except (ValueError, TypeError):

2108 if self.ndim == 1 or self.shape[0] == 1:

2109 if isinstance(self.dtype, IntervalDtype):

2110 # TestSetitemFloatIntervalWithIntIntervalValues

2111 blk = self.coerce_to_target_dtype(orig_other)

2112 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)

2113 return self._maybe_downcast(

2114 nbs, downcast=_downcast, using_cow=using_cow, caller="where"

2115 )

2116

2117 elif isinstance(self, NDArrayBackedExtensionBlock):

2118 # NB: not (yet) the same as

2119 # isinstance(values, NDArrayBackedExtensionArray)

2120 blk = self.coerce_to_target_dtype(orig_other)

2121 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)

2122 return self._maybe_downcast(

2123 nbs, downcast=_downcast, using_cow=using_cow, caller="where"

2124 )

2125

2126 else:

2127 raise

2128

2129 else:

2130 # Same pattern we use in Block.putmask

2131 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray))

2132

2133 res_blocks = []

2134 nbs = self._split()

2135 for i, nb in enumerate(nbs):

2136 n = orig_other

2137 if is_array:

2138 # we have a different value per-column

2139 n = orig_other[:, i : i + 1]

2140

2141 submask = orig_cond[:, i : i + 1]

2142 rbs = nb.where(n, submask, using_cow=using_cow)

2143 res_blocks.extend(rbs)

2144 return res_blocks

2145

2146 nb = self.make_block_same_class(res_values)

2147 return [nb]

2148

2149 @final

2150 def putmask(

2151 self, mask, new, using_cow: bool = False, already_warned=None

2152 ) -> list[Block]:

2153 """

2154 See Block.putmask.__doc__

2155 """

2156 mask = extract_bool_array(mask)

2157 if new is lib.no_default:

2158 new = self.fill_value

2159

2160 orig_new = new

2161 orig_mask = mask

2162 new = self._maybe_squeeze_arg(new)

2163 mask = self._maybe_squeeze_arg(mask)

2164

2165 if not mask.any():

2166 if using_cow:

2167 return [self.copy(deep=False)]

2168 return [self]

2169

2170 if (

2171 warn_copy_on_write()

2172 and already_warned is not None

2173 and not already_warned.warned_already

2174 ):

2175 if self.refs.has_reference():

2176 warnings.warn(

2177 COW_WARNING_GENERAL_MSG,

2178 FutureWarning,

2179 stacklevel=find_stack_level(),

2180 )

2181 already_warned.warned_already = True

2182

2183 self = self._maybe_copy(using_cow, inplace=True)

2184 values = self.values

2185 if values.ndim == 2:

2186 values = values.T

2187

2188 try:

2189 # Caller is responsible for ensuring matching lengths

2190 values._putmask(mask, new)

2191 except (TypeError, ValueError):

2192 if self.ndim == 1 or self.shape[0] == 1:

2193 if isinstance(self.dtype, IntervalDtype):

2194 # Discussion about what we want to support in the general

2195 # case GH#39584

2196 blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)

2197 return blk.putmask(orig_mask, orig_new)

2198

2199 elif isinstance(self, NDArrayBackedExtensionBlock):

2200 # NB: not (yet) the same as

2201 # isinstance(values, NDArrayBackedExtensionArray)

2202 blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)

2203 return blk.putmask(orig_mask, orig_new)

2204

2205 else:

2206 raise

2207

2208 else:

2209 # Same pattern we use in Block.putmask

2210 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))

2211

2212 res_blocks = []

2213 nbs = self._split()

2214 for i, nb in enumerate(nbs):

2215 n = orig_new

2216 if is_array:

2217 # we have a different value per-column

2218 n = orig_new[:, i : i + 1]

2219

2220 submask = orig_mask[:, i : i + 1]

2221 rbs = nb.putmask(submask, n)

2222 res_blocks.extend(rbs)

2223 return res_blocks

2224

2225 return [self]

2226

2227 @final

2228 def delete(self, loc) -> list[Block]:

2229 # This will be unnecessary if/when __array_function__ is implemented

2230 if self.ndim == 1:

2231 values = self.values.delete(loc)

2232 mgr_locs = self._mgr_locs.delete(loc)

2233 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]

2234 elif self.values.ndim == 1:

2235 # We get here through to_stata

2236 return []

2237 return super().delete(loc)

2238

2239 @final

2240 @cache_readonly

2241 def array_values(self) -> ExtensionArray:

2242 return self.values

2243

2244 @final

2245 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

2246 """

2247 return object dtype as boxed values, such as Timestamps/Timedelta

2248 """

2249 values: ArrayLike = self.values

2250 if dtype == _dtype_obj:

2251 values = values.astype(object)

2252 # TODO(EA2D): reshape not needed with 2D EAs

2253 return np.asarray(values).reshape(self.shape)

2254

2255 @final

2256 def pad_or_backfill(

2257 self,

2258 *,

2259 method: FillnaOptions,

2260 axis: AxisInt = 0,

2261 inplace: bool = False,

2262 limit: int | None = None,

2263 limit_area: Literal["inside", "outside"] | None = None,

2264 downcast: Literal["infer"] | None = None,

2265 using_cow: bool = False,

2266 already_warned=None,

2267 ) -> list[Block]:

2268 values = self.values

2269

2270 kwargs: dict[str, Any] = {"method": method, "limit": limit}

2271 if "limit_area" in inspect.signature(values._pad_or_backfill).parameters:

2272 kwargs["limit_area"] = limit_area

2273 elif limit_area is not None:

2274 raise NotImplementedError(

2275 f"{type(values).__name__} does not implement limit_area "

2276 "(added in pandas 2.2). 3rd-party ExtnsionArray authors "

2277 "need to add this argument to _pad_or_backfill."

2278 )

2279

2280 if values.ndim == 2 and axis == 1:

2281 # NDArrayBackedExtensionArray.fillna assumes axis=0

2282 new_values = values.T._pad_or_backfill(**kwargs).T

2283 else:

2284 new_values = values._pad_or_backfill(**kwargs)

2285 return [self.make_block_same_class(new_values)]

2286

2287

2288class ExtensionBlock(EABackedBlock):

2289 """

2290 Block for holding extension types.

2291

2292 Notes

2293 -----

2294 This holds all 3rd-party extension array types. It's also the immediate

2295 parent class for our internal extension types' blocks.

2296

2297 ExtensionArrays are limited to 1-D.

2298 """

2299

2300 values: ExtensionArray

2301

2302 def fillna(

2303 self,

2304 value,

2305 limit: int | None = None,

2306 inplace: bool = False,

2307 downcast=None,

2308 using_cow: bool = False,

2309 already_warned=None,

2310 ) -> list[Block]:

2311 if isinstance(self.dtype, IntervalDtype):

2312 # Block.fillna handles coercion (test_fillna_interval)

2313 return super().fillna(

2314 value=value,

2315 limit=limit,

2316 inplace=inplace,

2317 downcast=downcast,

2318 using_cow=using_cow,

2319 already_warned=already_warned,

2320 )

2321 if using_cow and self._can_hold_na and not self.values._hasna:

2322 refs = self.refs

2323 new_values = self.values

2324 else:

2325 copy, refs = self._get_refs_and_copy(using_cow, inplace)

2326

2327 try:

2328 new_values = self.values.fillna(

2329 value=value, method=None, limit=limit, copy=copy

2330 )

2331 except TypeError:

2332 # 3rd party EA that has not implemented copy keyword yet

2333 refs = None

2334 new_values = self.values.fillna(value=value, method=None, limit=limit)

2335 # issue the warning *after* retrying, in case the TypeError

2336 # was caused by an invalid fill_value

2337 warnings.warn(

2338 # GH#53278

2339 "ExtensionArray.fillna added a 'copy' keyword in pandas "

2340 "2.1.0. In a future version, ExtensionArray subclasses will "

2341 "need to implement this keyword or an exception will be "

2342 "raised. In the interim, the keyword is ignored by "

2343 f"{type(self.values).__name__}.",

2344 DeprecationWarning,

2345 stacklevel=find_stack_level(),

2346 )

2347 else:

2348 if (

2349 not copy

2350 and warn_copy_on_write()

2351 and already_warned is not None

2352 and not already_warned.warned_already

2353 ):

2354 if self.refs.has_reference():

2355 warnings.warn(

2356 COW_WARNING_GENERAL_MSG,

2357 FutureWarning,

2358 stacklevel=find_stack_level(),

2359 )

2360 already_warned.warned_already = True

2361

2362 nb = self.make_block_same_class(new_values, refs=refs)

2363 return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")

2364

2365 @cache_readonly

2366 def shape(self) -> Shape:

2367 # TODO(EA2D): override unnecessary with 2D EAs

2368 if self.ndim == 1:

2369 return (len(self.values),)

2370 return len(self._mgr_locs), len(self.values)

2371

2372 def iget(self, i: int | tuple[int, int] | tuple[slice, int]):

2373 # In the case where we have a tuple[slice, int], the slice will always

2374 # be slice(None)

2375 # We _could_ make the annotation more specific, but mypy would

2376 # complain about override mismatch:

2377 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int]

2378

2379 # Note: only reached with self.ndim == 2

2380

2381 if isinstance(i, tuple):

2382 # TODO(EA2D): unnecessary with 2D EAs

2383 col, loc = i

2384 if not com.is_null_slice(col) and col != 0:

2385 raise IndexError(f"{self} only contains one item")

2386 if isinstance(col, slice):

2387 # the is_null_slice check above assures that col is slice(None)

2388 # so what we want is a view on all our columns and row loc

2389 if loc < 0:

2390 loc += len(self.values)

2391 # Note: loc:loc+1 vs [[loc]] makes a difference when called

2392 # from fast_xs because we want to get a view back.

2393 return self.values[loc : loc + 1]

2394 return self.values[loc]

2395 else:

2396 if i != 0:

2397 raise IndexError(f"{self} only contains one item")

2398 return self.values

2399

2400 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:

2401 # When an ndarray, we should have locs.tolist() == [0]

2402 # When a BlockPlacement we should have list(locs) == [0]

2403 if copy:

2404 self.values = self.values.copy()

2405 self.values[:] = values

2406

2407 def _maybe_squeeze_arg(self, arg):

2408 """

2409 If necessary, squeeze a (N, 1) ndarray to (N,)

2410 """

2411 # e.g. if we are passed a 2D mask for putmask

2412 if (

2413 isinstance(arg, (np.ndarray, ExtensionArray))

2414 and arg.ndim == self.values.ndim + 1

2415 ):

2416 # TODO(EA2D): unnecessary with 2D EAs

2417 assert arg.shape[1] == 1

2418 # error: No overload variant of "__getitem__" of "ExtensionArray"

2419 # matches argument type "Tuple[slice, int]"

2420 arg = arg[:, 0] # type: ignore[call-overload]

2421 elif isinstance(arg, ABCDataFrame):

2422 # 2022-01-06 only reached for setitem

2423 # TODO: should we avoid getting here with DataFrame?

2424 assert arg.shape[1] == 1

2425 arg = arg._ixs(0, axis=1)._values

2426

2427 return arg

2428

2429 def _unwrap_setitem_indexer(self, indexer):

2430 """

2431 Adapt a 2D-indexer to our 1D values.

2432

2433 This is intended for 'setitem', not 'iget' or '_slice'.

2434 """

2435 # TODO: ATM this doesn't work for iget/_slice, can we change that?

2436

2437 if isinstance(indexer, tuple) and len(indexer) == 2:

2438 # TODO(EA2D): not needed with 2D EAs

2439 # Should never have length > 2. Caller is responsible for checking.

2440 # Length 1 is reached vis setitem_single_block and setitem_single_column

2441 # each of which pass indexer=(pi,)

2442 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer):

2443 # GH#44703 went through indexing.maybe_convert_ix

2444 first, second = indexer

2445 if not (

2446 second.size == 1 and (second == 0).all() and first.shape[1] == 1

2447 ):

2448 raise NotImplementedError(

2449 "This should not be reached. Please report a bug at "

2450 "github.com/pandas-dev/pandas/"

2451 )

2452 indexer = first[:, 0]

2453

2454 elif lib.is_integer(indexer[1]) and indexer[1] == 0:

2455 # reached via setitem_single_block passing the whole indexer

2456 indexer = indexer[0]

2457

2458 elif com.is_null_slice(indexer[1]):

2459 indexer = indexer[0]

2460

2461 elif is_list_like(indexer[1]) and indexer[1][0] == 0:

2462 indexer = indexer[0]

2463

2464 else:

2465 raise NotImplementedError(

2466 "This should not be reached. Please report a bug at "

2467 "github.com/pandas-dev/pandas/"

2468 )

2469 return indexer

2470

2471 @property

2472 def is_view(self) -> bool:

2473 """Extension arrays are never treated as views."""

2474 return False

2475

2476 # error: Cannot override writeable attribute with read-only property

2477 @cache_readonly

2478 def is_numeric(self) -> bool: # type: ignore[override]

2479 return self.values.dtype._is_numeric

2480

2481 def _slice(

2482 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]

2483 ) -> ExtensionArray:

2484 """

2485 Return a slice of my values.

2486

2487 Parameters

2488 ----------

2489 slicer : slice, ndarray[int], or ndarray[bool]

2490 Valid (non-reducing) indexer for self.values.

2491

2492 Returns

2493 -------

2494 ExtensionArray

2495 """

2496 # Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which

2497 # is only for Series, i.e. self.ndim == 1.

2498

2499 # return same dims as we currently have

2500 if self.ndim == 2:

2501 # reached via getitem_block via _slice_take_blocks_ax0

2502 # TODO(EA2D): won't be necessary with 2D EAs

2503

2504 if not isinstance(slicer, slice):

2505 raise AssertionError(

2506 "invalid slicing for a 1-ndim ExtensionArray", slicer

2507 )

2508 # GH#32959 only full-slicers along fake-dim0 are valid

2509 # TODO(EA2D): won't be necessary with 2D EAs

2510 # range(1) instead of self._mgr_locs to avoid exception on [::-1]

2511 # see test_iloc_getitem_slice_negative_step_ea_block

2512 new_locs = range(1)[slicer]

2513 if not len(new_locs):

2514 raise AssertionError(

2515 "invalid slicing for a 1-ndim ExtensionArray", slicer

2516 )

2517 slicer = slice(None)

2518

2519 return self.values[slicer]

2520

2521 @final

2522 def slice_block_rows(self, slicer: slice) -> Self:

2523 """

2524 Perform __getitem__-like specialized to slicing along index.

2525 """

2526 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't

2527 # require subclasses of ExtensionArray to support that form (for now).

2528 new_values = self.values[slicer]

2529 return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)

2530

2531 def _unstack(

2532 self,

2533 unstacker,

2534 fill_value,

2535 new_placement: npt.NDArray[np.intp],

2536 needs_masking: npt.NDArray[np.bool_],

2537 ):

2538 # ExtensionArray-safe unstack.

2539 # We override Block._unstack, which unstacks directly on the

2540 # values of the array. For EA-backed blocks, this would require

2541 # converting to a 2-D ndarray of objects.

2542 # Instead, we unstack an ndarray of integer positions, followed by

2543 # a `take` on the actual values.

2544

2545 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index)

2546 new_values, mask = unstacker.arange_result

2547

2548 # Note: these next two lines ensure that

2549 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)

2550 # which the calling function needs in order to pass verify_integrity=False

2551 # to the BlockManager constructor

2552 new_values = new_values.T[mask]

2553 new_placement = new_placement[mask]

2554

2555 # needs_masking[i] calculated once in BlockManager.unstack tells

2556 # us if there are any -1s in the relevant indices. When False,

2557 # that allows us to go through a faster path in 'take', among

2558 # other things avoiding e.g. Categorical._validate_scalar.

2559 blocks = [

2560 # TODO: could cast to object depending on fill_value?

2561 type(self)(

2562 self.values.take(

2563 indices, allow_fill=needs_masking[i], fill_value=fill_value

2564 ),

2565 BlockPlacement(place),

2566 ndim=2,

2567 )

2568 for i, (indices, place) in enumerate(zip(new_values, new_placement))

2569 ]

2570 return blocks, mask

2571

2572

2573class NumpyBlock(Block):

2574 values: np.ndarray

2575 __slots__ = ()

2576

2577 @property

2578 def is_view(self) -> bool:

2579 """return a boolean if I am possibly a view"""

2580 return self.values.base is not None

2581

2582 @property

2583 def array_values(self) -> ExtensionArray:

2584 return NumpyExtensionArray(self.values)

2585

2586 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

2587 if dtype == _dtype_obj:

2588 return self.values.astype(_dtype_obj)

2589 return self.values

2590

2591 @cache_readonly

2592 def is_numeric(self) -> bool: # type: ignore[override]

2593 dtype = self.values.dtype

2594 kind = dtype.kind

2595

2596 return kind in "fciub"

2597

2598

2599class NumericBlock(NumpyBlock):

2600 # this Block type is kept for backwards-compatibility

2601 # TODO(3.0): delete and remove deprecation in __init__.py.

2602 __slots__ = ()

2603

2604

2605class ObjectBlock(NumpyBlock):

2606 # this Block type is kept for backwards-compatibility

2607 # TODO(3.0): delete and remove deprecation in __init__.py.

2608 __slots__ = ()

2609

2610

2611class NDArrayBackedExtensionBlock(EABackedBlock):

2612 """

2613 Block backed by an NDArrayBackedExtensionArray

2614 """

2615

2616 values: NDArrayBackedExtensionArray

2617

2618 @property

2619 def is_view(self) -> bool:

2620 """return a boolean if I am possibly a view"""

2621 # check the ndarray values of the DatetimeIndex values

2622 return self.values._ndarray.base is not None

2623

2624

2625class DatetimeLikeBlock(NDArrayBackedExtensionBlock):

2626 """Block for datetime64[ns], timedelta64[ns]."""

2627

2628 __slots__ = ()

2629 is_numeric = False

2630 values: DatetimeArray | TimedeltaArray

2631

2632

2633class DatetimeTZBlock(DatetimeLikeBlock):

2634 """implement a datetime64 block with a tz attribute"""

2635

2636 values: DatetimeArray

2637

2638 __slots__ = ()

2639

2640

2641# -----------------------------------------------------------------

2642# Constructor Helpers

2643

2644

2645def maybe_coerce_values(values: ArrayLike) -> ArrayLike:

2646 """

2647 Input validation for values passed to __init__. Ensure that

2648 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure

2649 that we do not have string dtypes.

2650

2651 Parameters

2652 ----------

2653 values : np.ndarray or ExtensionArray

2654

2655 Returns

2656 -------

2657 values : np.ndarray or ExtensionArray

2658 """

2659 # Caller is responsible for ensuring NumpyExtensionArray is already extracted.

2660

2661 if isinstance(values, np.ndarray):

2662 values = ensure_wrapped_if_datetimelike(values)

2663

2664 if issubclass(values.dtype.type, str):

2665 values = np.array(values, dtype=object)

2666

2667 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None:

2668 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame

2669 values = values._with_freq(None)

2670

2671 return values

2672

2673

2674def get_block_type(dtype: DtypeObj) -> type[Block]:

2675 """

2676 Find the appropriate Block subclass to use for the given values and dtype.

2677

2678 Parameters

2679 ----------

2680 dtype : numpy or pandas dtype

2681

2682 Returns

2683 -------

2684 cls : class, subclass of Block

2685 """

2686 if isinstance(dtype, DatetimeTZDtype):

2687 return DatetimeTZBlock

2688 elif isinstance(dtype, PeriodDtype):

2689 return NDArrayBackedExtensionBlock

2690 elif isinstance(dtype, ExtensionDtype):

2691 # Note: need to be sure NumpyExtensionArray is unwrapped before we get here

2692 return ExtensionBlock

2693

2694 # We use kind checks because it is much more performant

2695 # than is_foo_dtype

2696 kind = dtype.kind

2697 if kind in "Mm":

2698 return DatetimeLikeBlock

2699

2700 return NumpyBlock

2701

2702

2703def new_block_2d(

2704 values: ArrayLike, placement: BlockPlacement, refs: BlockValuesRefs | None = None

2705):

2706 # new_block specialized to case with

2707 # ndim=2

2708 # isinstance(placement, BlockPlacement)

2709 # check_ndim/ensure_block_shape already checked

2710 klass = get_block_type(values.dtype)

2711

2712 values = maybe_coerce_values(values)

2713 return klass(values, ndim=2, placement=placement, refs=refs)

2714

2715

2716def new_block(

2717 values,

2718 placement: BlockPlacement,

2719 *,

2720 ndim: int,

2721 refs: BlockValuesRefs | None = None,

2722) -> Block:

2723 # caller is responsible for ensuring:

2724 # - values is NOT a NumpyExtensionArray

2725 # - check_ndim/ensure_block_shape already checked

2726 # - maybe_coerce_values already called/unnecessary

2727 klass = get_block_type(values.dtype)

2728 return klass(values, ndim=ndim, placement=placement, refs=refs)

2729

2730

2731def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:

2732 """

2733 ndim inference and validation.

2734

2735 Validates that values.ndim and ndim are consistent.

2736 Validates that len(values) and len(placement) are consistent.

2737

2738 Parameters

2739 ----------

2740 values : array-like

2741 placement : BlockPlacement

2742 ndim : int

2743

2744 Raises

2745 ------

2746 ValueError : the number of dimensions do not match

2747 """

2748

2749 if values.ndim > ndim:

2750 # Check for both np.ndarray and ExtensionArray

2751 raise ValueError(

2752 "Wrong number of dimensions. "

2753 f"values.ndim > ndim [{values.ndim} > {ndim}]"

2754 )

2755

2756 if not is_1d_only_ea_dtype(values.dtype):

2757 # TODO(EA2D): special case not needed with 2D EAs

2758 if values.ndim != ndim:

2759 raise ValueError(

2760 "Wrong number of dimensions. "

2761 f"values.ndim != ndim [{values.ndim} != {ndim}]"

2762 )

2763 if len(placement) != len(values):

2764 raise ValueError(

2765 f"Wrong number of items passed {len(values)}, "

2766 f"placement implies {len(placement)}"

2767 )

2768 elif ndim == 2 and len(placement) != 1:

2769 # TODO(EA2D): special case unnecessary with 2D EAs

2770 raise ValueError("need to split")

2771

2772

2773def extract_pandas_array(

2774 values: ArrayLike, dtype: DtypeObj | None, ndim: int

2775) -> tuple[ArrayLike, DtypeObj | None]:

2776 """

2777 Ensure that we don't allow NumpyExtensionArray / NumpyEADtype in internals.

2778 """

2779 # For now, blocks should be backed by ndarrays when possible.

2780 if isinstance(values, ABCNumpyExtensionArray):

2781 values = values.to_numpy()

2782 if ndim and ndim > 1:

2783 # TODO(EA2D): special case not needed with 2D EAs

2784 values = np.atleast_2d(values)

2785

2786 if isinstance(dtype, NumpyEADtype):

2787 dtype = dtype.numpy_dtype

2788

2789 return values, dtype

2790

2791

2792# -----------------------------------------------------------------

2793

2794

2795def extend_blocks(result, blocks=None) -> list[Block]:

2796 """return a new extended blocks, given the result"""

2797 if blocks is None:

2798 blocks = []

2799 if isinstance(result, list):

2800 for r in result:

2801 if isinstance(r, list):

2802 blocks.extend(r)

2803 else:

2804 blocks.append(r)

2805 else:

2806 assert isinstance(result, Block), type(result)

2807 blocks.append(result)

2808 return blocks

2809

2810

2811def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:

2812 """

2813 Reshape if possible to have values.ndim == ndim.

2814 """

2815

2816 if values.ndim < ndim:

2817 if not is_1d_only_ea_dtype(values.dtype):

2818 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023

2819 # block.shape is incorrect for "2D" ExtensionArrays

2820 # We can't, and don't need to, reshape.

2821 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values)

2822 values = values.reshape(1, -1)

2823

2824 return values

2825

2826

2827def external_values(values: ArrayLike) -> ArrayLike:

2828 """

2829 The array that Series.values returns (public attribute).

2830

2831 This has some historical constraints, and is overridden in block

2832 subclasses to return the correct array (e.g. period returns

2833 object ndarray and datetimetz a datetime64[ns] ndarray instead of

2834 proper extension array).

2835 """

2836 if isinstance(values, (PeriodArray, IntervalArray)):

2837 return values.astype(object)

2838 elif isinstance(values, (DatetimeArray, TimedeltaArray)):

2839 # NB: for datetime64tz this is different from np.asarray(values), since

2840 # that returns an object-dtype ndarray of Timestamps.

2841 # Avoid raising in .astype in casting from dt64tz to dt64

2842 values = values._ndarray

2843

2844 if isinstance(values, np.ndarray) and using_copy_on_write():

2845 values = values.view()

2846 values.flags.writeable = False

2847

2848 # TODO(CoW) we should also mark our ExtensionArrays as read-only

2849

2850 return values