Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/internals/blocks.py: 22%

1from __future__ import annotations

3from functools import wraps

4import re

5from typing import (

6 TYPE_CHECKING,

7 Any,

8 Callable,

9 Iterable,

10 Sequence,

11 cast,

12 final,

13)

15import numpy as np

17from pandas._config import using_copy_on_write

19from pandas._libs import (

20 internals as libinternals,

21 lib,

22 writers,

23)

24from pandas._libs.internals import (

25 BlockPlacement,

26 BlockValuesRefs,

27)

28from pandas._libs.missing import NA

29from pandas._libs.tslibs import IncompatibleFrequency

30from pandas._typing import (

31 ArrayLike,

32 AxisInt,

33 DtypeObj,

34 F,

35 FillnaOptions,

36 IgnoreRaise,

37 QuantileInterpolation,

38 Shape,

39 npt,

40)

41from pandas.errors import AbstractMethodError

42from pandas.util._decorators import cache_readonly

43from pandas.util._validators import validate_bool_kwarg

45from pandas.core.dtypes.astype import (

46 astype_array_safe,

47 astype_is_view,

48)

49from pandas.core.dtypes.cast import (

50 LossySetitemError,

51 can_hold_element,

52 find_result_type,

53 maybe_downcast_to_dtype,

54 np_can_hold_element,

55)

56from pandas.core.dtypes.common import (

57 ensure_platform_int,

58 is_1d_only_ea_dtype,

59 is_1d_only_ea_obj,

60 is_dtype_equal,

61 is_interval_dtype,

62 is_list_like,

63 is_sparse,

64 is_string_dtype,

65)

66from pandas.core.dtypes.dtypes import (

67 DatetimeTZDtype,

68 ExtensionDtype,

69 PandasDtype,

70 PeriodDtype,

71)

72from pandas.core.dtypes.generic import (

73 ABCDataFrame,

74 ABCIndex,

75 ABCPandasArray,

76 ABCSeries,

77)

78from pandas.core.dtypes.missing import (

79 is_valid_na_for_dtype,

80 isna,

81 na_value_for_dtype,

82)

84from pandas.core import missing

85import pandas.core.algorithms as algos

86from pandas.core.array_algos.putmask import (

87 extract_bool_array,

88 putmask_inplace,

89 putmask_without_repeat,

90 setitem_datetimelike_compat,

91 validate_putmask,

92)

93from pandas.core.array_algos.quantile import quantile_compat

94from pandas.core.array_algos.replace import (

95 compare_or_regex_search,

96 replace_regex,

97 should_use_regex,

98)

99from pandas.core.array_algos.transforms import shift

100from pandas.core.arrays import (

101 Categorical,

102 DatetimeArray,

103 ExtensionArray,

104 IntervalArray,

105 PandasArray,

106 PeriodArray,

107 TimedeltaArray,

108)

109from pandas.core.arrays.sparse import SparseDtype

110from pandas.core.base import PandasObject

111import pandas.core.common as com

112from pandas.core.computation import expressions

113from pandas.core.construction import (

114 ensure_wrapped_if_datetimelike,

115 extract_array,

116)

117from pandas.core.indexers import check_setitem_lengths

118

119if TYPE_CHECKING:

120 from pandas.core.api import Index

121 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray

122

123# comparison is faster than is_object_dtype

124_dtype_obj = np.dtype("object")

125

126

127def maybe_split(meth: F) -> F:

128 """

129 If we have a multi-column block, split and operate block-wise. Otherwise

130 use the original method.

131 """

132

133 @wraps(meth)

134 def newfunc(self, *args, **kwargs) -> list[Block]:

135 if self.ndim == 1 or self.shape[0] == 1:

136 return meth(self, *args, **kwargs)

137 else:

138 # Split and operate column-by-column

139 return self.split_and_operate(meth, *args, **kwargs)

140

141 return cast(F, newfunc)

142

143

144class Block(PandasObject):

145 """

146 Canonical n-dimensional unit of homogeneous dtype contained in a pandas

147 data structure

148

149 Index-ignorant; let the container take care of that

150 """

151

152 values: np.ndarray | ExtensionArray

153 ndim: int

154 refs: BlockValuesRefs

155 __init__: Callable

156

157 __slots__ = ()

158 is_numeric = False

159 is_object = False

160 is_extension = False

161 _can_consolidate = True

162 _validate_ndim = True

163

164 @final

165 @cache_readonly

166 def _consolidate_key(self):

167 return self._can_consolidate, self.dtype.name

168

169 @final

170 @cache_readonly

171 def _can_hold_na(self) -> bool:

172 """

173 Can we store NA values in this Block?

174 """

175 dtype = self.dtype

176 if isinstance(dtype, np.dtype):

177 return dtype.kind not in ["b", "i", "u"]

178 return dtype._can_hold_na

179

180 @final

181 @property

182 def is_bool(self) -> bool:

183 """

184 We can be bool if a) we are bool dtype or b) object dtype with bool objects.

185 """

186 return self.values.dtype == np.dtype(bool)

187

188 @final

189 def external_values(self):

190 return external_values(self.values)

191

192 @final

193 @cache_readonly

194 def fill_value(self):

195 # Used in reindex_indexer

196 return na_value_for_dtype(self.dtype, compat=False)

197

198 @final

199 def _standardize_fill_value(self, value):

200 # if we are passed a scalar None, convert it here

201 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype):

202 value = self.fill_value

203 return value

204

205 @property

206 def mgr_locs(self) -> BlockPlacement:

207 return self._mgr_locs

208

209 @mgr_locs.setter

210 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None:

211 self._mgr_locs = new_mgr_locs

212

213 @final

214 def make_block(

215 self, values, placement=None, refs: BlockValuesRefs | None = None

216 ) -> Block:

217 """

218 Create a new block, with type inference propagate any values that are

219 not specified

220 """

221 if placement is None:

222 placement = self._mgr_locs

223 if self.is_extension:

224 values = ensure_block_shape(values, ndim=self.ndim)

225

226 # TODO: perf by not going through new_block

227 # We assume maybe_coerce_values has already been called

228 return new_block(values, placement=placement, ndim=self.ndim, refs=refs)

229

230 @final

231 def make_block_same_class(

232 self,

233 values,

234 placement: BlockPlacement | None = None,

235 refs: BlockValuesRefs | None = None,

236 ) -> Block:

237 """Wrap given values in a block of same type as self."""

238 # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet

239 # relied on it, as of 2.0 the caller is responsible for this.

240 if placement is None:

241 placement = self._mgr_locs

242

243 # We assume maybe_coerce_values has already been called

244 return type(self)(values, placement=placement, ndim=self.ndim, refs=refs)

245

246 @final

247 def __repr__(self) -> str:

248 # don't want to print out all of the items here

249 name = type(self).__name__

250 if self.ndim == 1:

251 result = f"{name}: {len(self)} dtype: {self.dtype}"

252 else:

253 shape = " x ".join([str(s) for s in self.shape])

254 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"

255

256 return result

257

258 @final

259 def __len__(self) -> int:

260 return len(self.values)

261

262 @final

263 def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:

264 """

265 Perform __getitem__-like, return result as block.

266

267 Only supports slices that preserve dimensionality.

268 """

269 # Note: the only place where we are called with ndarray[intp]

270 # is from internals.concat, and we can verify that never happens

271 # with 1-column blocks, i.e. never for ExtensionBlock.

272

273 new_mgr_locs = self._mgr_locs[slicer]

274

275 new_values = self._slice(slicer)

276 refs = self.refs if isinstance(slicer, slice) else None

277 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)

278

279 @final

280 def getitem_block_columns(

281 self, slicer: slice, new_mgr_locs: BlockPlacement

282 ) -> Block:

283 """

284 Perform __getitem__-like, return result as block.

285

286 Only supports slices that preserve dimensionality.

287 """

288 new_values = self._slice(slicer)

289

290 if new_values.ndim != self.values.ndim:

291 raise ValueError("Only same dim slicing is allowed")

292

293 return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)

294

295 @final

296 def _can_hold_element(self, element: Any) -> bool:

297 """require the same dtype as ourselves"""

298 element = extract_array(element, extract_numpy=True)

299 return can_hold_element(self.values, element)

300

301 @final

302 def should_store(self, value: ArrayLike) -> bool:

303 """

304 Should we set self.values[indexer] = value inplace or do we need to cast?

305

306 Parameters

307 ----------

308 value : np.ndarray or ExtensionArray

309

310 Returns

311 -------

312 bool

313 """

314 # faster equivalent to is_dtype_equal(value.dtype, self.dtype)

315 try:

316 return value.dtype == self.dtype

317 except TypeError:

318 return False

319

320 # ---------------------------------------------------------------------

321 # Apply/Reduce and Helpers

322

323 @final

324 def apply(self, func, **kwargs) -> list[Block]:

325 """

326 apply the function to my values; return a block if we are not

327 one

328 """

329 result = func(self.values, **kwargs)

330

331 return self._split_op_result(result)

332

333 @final

334 def reduce(self, func) -> list[Block]:

335 # We will apply the function and reshape the result into a single-row

336 # Block with the same mgr_locs; squeezing will be done at a higher level

337 assert self.ndim == 2

338

339 result = func(self.values)

340

341 if self.values.ndim == 1:

342 # TODO(EA2D): special case not needed with 2D EAs

343 res_values = np.array([[result]])

344 else:

345 res_values = result.reshape(-1, 1)

346

347 nb = self.make_block(res_values)

348 return [nb]

349

350 @final

351 def _split_op_result(self, result: ArrayLike) -> list[Block]:

352 # See also: split_and_operate

353 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype):

354 # TODO(EA2D): unnecessary with 2D EAs

355 # if we get a 2D ExtensionArray, we need to split it into 1D pieces

356 nbs = []

357 for i, loc in enumerate(self._mgr_locs):

358 if not is_1d_only_ea_obj(result):

359 vals = result[i : i + 1]

360 else:

361 vals = result[i]

362

363 block = self.make_block(values=vals, placement=loc)

364 nbs.append(block)

365 return nbs

366

367 nb = self.make_block(result)

368

369 return [nb]

370

371 @final

372 def _split(self) -> list[Block]:

373 """

374 Split a block into a list of single-column blocks.

375 """

376 assert self.ndim == 2

377

378 new_blocks = []

379 for i, ref_loc in enumerate(self._mgr_locs):

380 vals = self.values[slice(i, i + 1)]

381

382 bp = BlockPlacement(ref_loc)

383 nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs)

384 new_blocks.append(nb)

385 return new_blocks

386

387 @final

388 def split_and_operate(self, func, *args, **kwargs) -> list[Block]:

389 """

390 Split the block and apply func column-by-column.

391

392 Parameters

393 ----------

394 func : Block method

395 *args

396 **kwargs

397

398 Returns

399 -------

400 List[Block]

401 """

402 assert self.ndim == 2 and self.shape[0] != 1

403

404 res_blocks = []

405 for nb in self._split():

406 rbs = func(nb, *args, **kwargs)

407 res_blocks.extend(rbs)

408 return res_blocks

409

410 # ---------------------------------------------------------------------

411 # Up/Down-casting

412

413 @final

414 def coerce_to_target_dtype(self, other) -> Block:

415 """

416 coerce the current block to a dtype compat for other

417 we will return a block, possibly object, and not raise

418

419 we can also safely try to coerce to the same dtype

420 and will receive the same block

421 """

422 new_dtype = find_result_type(self.values, other)

423

424 return self.astype(new_dtype, copy=False)

425

426 @final

427 def _maybe_downcast(

428 self, blocks: list[Block], downcast=None, using_cow: bool = False

429 ) -> list[Block]:

430 if downcast is False:

431 return blocks

432

433 if self.dtype == _dtype_obj:

434 # TODO: does it matter that self.dtype might not match blocks[i].dtype?

435 # GH#44241 We downcast regardless of the argument;

436 # respecting 'downcast=None' may be worthwhile at some point,

437 # but ATM it breaks too much existing code.

438 # split and convert the blocks

439

440 return extend_blocks(

441 [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]

442 )

443

444 if downcast is None:

445 return blocks

446

447 return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])

448

449 @final

450 @maybe_split

451 def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]:

452 """

453 downcast specialized to 2D case post-validation.

454

455 Refactored to allow use of maybe_split.

456 """

457 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)

458 refs = self.refs if using_cow and new_values is self.values else None

459 return [self.make_block(new_values, refs=refs)]

460

461 def convert(

462 self,

463 *,

464 copy: bool = True,

465 using_cow: bool = False,

466 ) -> list[Block]:

467 """

468 attempt to coerce any object types to better types return a copy

469 of the block (if copy = True) by definition we are not an ObjectBlock

470 here!

471 """

472 if not copy and using_cow:

473 return [self.copy(deep=False)]

474 return [self.copy()] if copy else [self]

475

476 # ---------------------------------------------------------------------

477 # Array-Like Methods

478

479 @cache_readonly

480 def dtype(self) -> DtypeObj:

481 return self.values.dtype

482

483 @final

484 def astype(

485 self,

486 dtype: DtypeObj,

487 copy: bool = False,

488 errors: IgnoreRaise = "raise",

489 using_cow: bool = False,

490 ) -> Block:

491 """

492 Coerce to the new dtype.

493

494 Parameters

495 ----------

496 dtype : np.dtype or ExtensionDtype

497 copy : bool, default False

498 copy if indicated

499 errors : str, {'raise', 'ignore'}, default 'raise'

500 - ``raise`` : allow exceptions to be raised

501 - ``ignore`` : suppress exceptions. On error return original object

502 using_cow: bool, default False

503 Signaling if copy on write copy logic is used.

504

505 Returns

506 -------

507 Block

508 """

509 values = self.values

510

511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)

512

513 new_values = maybe_coerce_values(new_values)

514

515 refs = None

516 if using_cow and astype_is_view(values.dtype, new_values.dtype):

517 refs = self.refs

518

519 newb = self.make_block(new_values, refs=refs)

520 if newb.shape != self.shape:

521 raise TypeError(

522 f"cannot set astype for copy = [{copy}] for dtype "

523 f"({self.dtype.name} [{self.shape}]) to different shape "

524 f"({newb.dtype.name} [{newb.shape}])"

525 )

526 return newb

527

528 @final

529 def to_native_types(self, na_rep: str = "nan", quoting=None, **kwargs) -> Block:

530 """convert to our native types format"""

531 result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs)

532 return self.make_block(result)

533

534 @final

535 def copy(self, deep: bool = True) -> Block:

536 """copy constructor"""

537 values = self.values

538 refs: BlockValuesRefs | None

539 if deep:

540 values = values.copy()

541 refs = None

542 else:

543 refs = self.refs

544 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs)

545

546 # ---------------------------------------------------------------------

547 # Replace

548

549 @final

550 def replace(

551 self,

552 to_replace,

553 value,

554 inplace: bool = False,

555 # mask may be pre-computed if we're called from replace_list

556 mask: npt.NDArray[np.bool_] | None = None,

557 using_cow: bool = False,

558 ) -> list[Block]:

559 """

560 replace the to_replace value with value, possible to create new

561 blocks here this is just a call to putmask.

562 """

563

564 # Note: the checks we do in NDFrame.replace ensure we never get

565 # here with listlike to_replace or value, as those cases

566 # go through replace_list

567 values = self.values

568

569 if isinstance(values, Categorical):

570 # TODO: avoid special-casing

571 # GH49404

572 if using_cow and (self.refs.has_reference() or not inplace):

573 blk = self.copy()

574 elif using_cow:

575 blk = self.copy(deep=False)

576 else:

577 blk = self if inplace else self.copy()

578 values = cast(Categorical, blk.values)

579 values._replace(to_replace=to_replace, value=value, inplace=True)

580 return [blk]

581

582 if not self._can_hold_element(to_replace):

583 # We cannot hold `to_replace`, so we know immediately that

584 # replacing it is a no-op.

585 # Note: If to_replace were a list, NDFrame.replace would call

586 # replace_list instead of replace.

587 if using_cow:

588 return [self.copy(deep=False)]

589 else:

590 return [self] if inplace else [self.copy()]

591

592 if mask is None:

593 mask = missing.mask_missing(values, to_replace)

594 if not mask.any():

595 # Note: we get here with test_replace_extension_other incorrectly

596 # bc _can_hold_element is incorrect.

597 if using_cow:

598 return [self.copy(deep=False)]

599 else:

600 return [self] if inplace else [self.copy()]

601

602 elif self._can_hold_element(value):

603 # TODO(CoW): Maybe split here as well into columns where mask has True

604 # and rest?

605 if using_cow:

606 if inplace:

607 blk = self.copy(deep=self.refs.has_reference())

608 else:

609 blk = self.copy()

610 else:

611 blk = self if inplace else self.copy()

612 putmask_inplace(blk.values, mask, value)

613 if not (self.is_object and value is None):

614 # if the user *explicitly* gave None, we keep None, otherwise

615 # may downcast to NaN

616 blocks = blk.convert(copy=False, using_cow=using_cow)

617 else:

618 blocks = [blk]

619 return blocks

620

621 elif self.ndim == 1 or self.shape[0] == 1:

622 if value is None or value is NA:

623 blk = self.astype(np.dtype(object))

624 else:

625 blk = self.coerce_to_target_dtype(value)

626 return blk.replace(

627 to_replace=to_replace,

628 value=value,

629 inplace=True,

630 mask=mask,

631 )

632

633 else:

634 # split so that we only upcast where necessary

635 blocks = []

636 for i, nb in enumerate(self._split()):

637 blocks.extend(

638 type(self).replace(

639 nb,

640 to_replace=to_replace,

641 value=value,

642 inplace=True,

643 mask=mask[i : i + 1],

644 using_cow=using_cow,

645 )

646 )

647 return blocks

648

649 @final

650 def _replace_regex(

651 self,

652 to_replace,

653 value,

654 inplace: bool = False,

655 mask=None,

656 using_cow: bool = False,

657 ) -> list[Block]:

658 """

659 Replace elements by the given value.

660

661 Parameters

662 ----------

663 to_replace : object or pattern

664 Scalar to replace or regular expression to match.

665 value : object

666 Replacement object.

667 inplace : bool, default False

668 Perform inplace modification.

669 mask : array-like of bool, optional

670 True indicate corresponding element is ignored.

671 using_cow: bool, default False

672 Specifying if copy on write is enabled.

673

674 Returns

675 -------

676 List[Block]

677 """

678 if not self._can_hold_element(to_replace):

679 # i.e. only ObjectBlock, but could in principle include a

680 # String ExtensionBlock

681 if using_cow:

682 return [self.copy(deep=False)]

683 return [self] if inplace else [self.copy()]

684

685 rx = re.compile(to_replace)

686

687 if using_cow:

688 if inplace and not self.refs.has_reference():

689 refs = self.refs

690 new_values = self.values

691 else:

692 refs = None

693 new_values = self.values.copy()

694 else:

695 refs = None

696 new_values = self.values if inplace else self.values.copy()

697

698 replace_regex(new_values, rx, value, mask)

699

700 block = self.make_block(new_values, refs=refs)

701 return block.convert(copy=False, using_cow=using_cow)

702

703 @final

704 def replace_list(

705 self,

706 src_list: Iterable[Any],

707 dest_list: Sequence[Any],

708 inplace: bool = False,

709 regex: bool = False,

710 using_cow: bool = False,

711 ) -> list[Block]:

712 """

713 See BlockManager.replace_list docstring.

714 """

715 values = self.values

716

717 if isinstance(values, Categorical):

718 # TODO: avoid special-casing

719 # GH49404

720 if using_cow and inplace:

721 blk = self.copy(deep=self.refs.has_reference())

722 else:

723 blk = self if inplace else self.copy()

724 values = cast(Categorical, blk.values)

725 values._replace(to_replace=src_list, value=dest_list, inplace=True)

726 return [blk]

727

728 # Exclude anything that we know we won't contain

729 pairs = [

730 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)

731 ]

732 if not len(pairs):

733 if using_cow:

734 return [self.copy(deep=False)]

735 # shortcut, nothing to replace

736 return [self] if inplace else [self.copy()]

737

738 src_len = len(pairs) - 1

739

740 if is_string_dtype(values.dtype):

741 # Calculate the mask once, prior to the call of comp

742 # in order to avoid repeating the same computations

743 na_mask = ~isna(values)

744 masks: Iterable[npt.NDArray[np.bool_]] = (

745 extract_bool_array(

746 cast(

747 ArrayLike,

748 compare_or_regex_search(

749 values, s[0], regex=regex, mask=na_mask

750 ),

751 )

752 )

753 for s in pairs

754 )

755 else:

756 # GH#38086 faster if we know we dont need to check for regex

757 masks = (missing.mask_missing(values, s[0]) for s in pairs)

758 # Materialize if inplace = True, since the masks can change

759 # as we replace

760 if inplace:

761 masks = list(masks)

762

763 if using_cow and inplace:

764 # Don't set up refs here, otherwise we will think that we have

765 # references when we check again later

766 rb = [self]

767 else:

768 rb = [self if inplace else self.copy()]

769

770 for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):

771 convert = i == src_len # only convert once at the end

772 new_rb: list[Block] = []

773

774 # GH-39338: _replace_coerce can split a block into

775 # single-column blocks, so track the index so we know

776 # where to index into the mask

777 for blk_num, blk in enumerate(rb):

778 if len(rb) == 1:

779 m = mask

780 else:

781 mib = mask

782 assert not isinstance(mib, bool)

783 m = mib[blk_num : blk_num + 1]

784

785 # error: Argument "mask" to "_replace_coerce" of "Block" has

786 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]";

787 # expected "ndarray[Any, dtype[bool_]]"

788 result = blk._replace_coerce(

789 to_replace=src,

790 value=dest,

791 mask=m,

792 inplace=inplace,

793 regex=regex,

794 using_cow=using_cow,

795 )

796 if convert and blk.is_object and not all(x is None for x in dest_list):

797 # GH#44498 avoid unwanted cast-back

798 result = extend_blocks(

799 [

800 b.convert(copy=True and not using_cow, using_cow=using_cow)

801 for b in result

802 ]

803 )

804 new_rb.extend(result)

805 rb = new_rb

806 return rb

807

808 @final

809 def _replace_coerce(

810 self,

811 to_replace,

812 value,

813 mask: npt.NDArray[np.bool_],

814 inplace: bool = True,

815 regex: bool = False,

816 using_cow: bool = False,

817 ) -> list[Block]:

818 """

819 Replace value corresponding to the given boolean array with another

820 value.

821

822 Parameters

823 ----------

824 to_replace : object or pattern

825 Scalar to replace or regular expression to match.

826 value : object

827 Replacement object.

828 mask : np.ndarray[bool]

829 True indicate corresponding element is ignored.

830 inplace : bool, default True

831 Perform inplace modification.

832 regex : bool, default False

833 If true, perform regular expression substitution.

834

835 Returns

836 -------

837 List[Block]

838 """

839 if should_use_regex(regex, to_replace):

840 return self._replace_regex(

841 to_replace,

842 value,

843 inplace=inplace,

844 mask=mask,

845 )

846 else:

847 if value is None:

848 # gh-45601, gh-45836, gh-46634

849 if mask.any():

850 has_ref = self.refs.has_reference()

851 nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow)

852 if (nb is self or using_cow) and not inplace:

853 nb = nb.copy()

854 elif inplace and has_ref and nb.refs.has_reference():

855 # no copy in astype and we had refs before

856 nb = nb.copy()

857 putmask_inplace(nb.values, mask, value)

858 return [nb]

859 if using_cow:

860 return [self.copy(deep=False)]

861 return [self] if inplace else [self.copy()]

862 return self.replace(

863 to_replace=to_replace,

864 value=value,

865 inplace=inplace,

866 mask=mask,

867 using_cow=using_cow,

868 )

869

870 # ---------------------------------------------------------------------

871 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock

872 # but not ExtensionBlock

873

874 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:

875 """

876 For compatibility with 1D-only ExtensionArrays.

877 """

878 return arg

879

880 def _unwrap_setitem_indexer(self, indexer):

881 """

882 For compatibility with 1D-only ExtensionArrays.

883 """

884 return indexer

885

886 # NB: this cannot be made cache_readonly because in mgr.set_values we pin

887 # new .values that can have different shape GH#42631

888 @property

889 def shape(self) -> Shape:

890 return self.values.shape

891

892 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray:

893 # In the case where we have a tuple[slice, int], the slice will always

894 # be slice(None)

895 # Note: only reached with self.ndim == 2

896 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]"

897 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type

898 # "Union[int, integer[Any]]"

899 return self.values[i] # type: ignore[index]

900

901 def _slice(

902 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]

903 ) -> ArrayLike:

904 """return a slice of my values"""

905

906 return self.values[slicer]

907

908 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:

909 """

910 Modify block values in-place with new item value.

911

912 If copy=True, first copy the underlying values in place before modifying

913 (for Copy-on-Write).

914

915 Notes

916 -----

917 `set_inplace` never creates a new array or new Block, whereas `setitem`

918 _may_ create a new array and always creates a new Block.

919

920 Caller is responsible for checking values.dtype == self.dtype.

921 """

922 if copy:

923 self.values = self.values.copy()

924 self.values[locs] = values

925

926 def take_nd(

927 self,

928 indexer: npt.NDArray[np.intp],

929 axis: AxisInt,

930 new_mgr_locs: BlockPlacement | None = None,

931 fill_value=lib.no_default,

932 ) -> Block:

933 """

934 Take values according to indexer and return them as a block.

935 """

936 values = self.values

937

938 if fill_value is lib.no_default:

939 fill_value = self.fill_value

940 allow_fill = False

941 else:

942 allow_fill = True

943

944 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype

945 new_values = algos.take_nd(

946 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value

947 )

948

949 # Called from three places in managers, all of which satisfy

950 # these assertions

951 if isinstance(self, ExtensionBlock):

952 # NB: in this case, the 'axis' kwarg will be ignored in the

953 # algos.take_nd call above.

954 assert not (self.ndim == 1 and new_mgr_locs is None)

955 assert not (axis == 0 and new_mgr_locs is None)

956

957 if new_mgr_locs is None:

958 new_mgr_locs = self._mgr_locs

959

960 if not is_dtype_equal(new_values.dtype, self.dtype):

961 return self.make_block(new_values, new_mgr_locs)

962 else:

963 return self.make_block_same_class(new_values, new_mgr_locs)

964

965 def _unstack(

966 self,

967 unstacker,

968 fill_value,

969 new_placement: npt.NDArray[np.intp],

970 needs_masking: npt.NDArray[np.bool_],

971 ):

972 """

973 Return a list of unstacked blocks of self

974

975 Parameters

976 ----------

977 unstacker : reshape._Unstacker

978 fill_value : int

979 Only used in ExtensionBlock._unstack

980 new_placement : np.ndarray[np.intp]

981 allow_fill : bool

982 needs_masking : np.ndarray[bool]

983

984 Returns

985 -------

986 blocks : list of Block

987 New blocks of unstacked values.

988 mask : array-like of bool

989 The mask of columns of `blocks` we should keep.

990 """

991 new_values, mask = unstacker.get_new_values(

992 self.values.T, fill_value=fill_value

993 )

994

995 mask = mask.any(0)

996 # TODO: in all tests we have mask.all(); can we rely on that?

997

998 # Note: these next two lines ensure that

999 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)

1000 # which the calling function needs in order to pass verify_integrity=False

1001 # to the BlockManager constructor

1002 new_values = new_values.T[mask]

1003 new_placement = new_placement[mask]

1004

1005 bp = BlockPlacement(new_placement)

1006 blocks = [new_block_2d(new_values, placement=bp)]

1007 return blocks, mask

1008

1009 # ---------------------------------------------------------------------

1010

1011 def setitem(self, indexer, value, using_cow: bool = False) -> Block:

1012 """

1013 Attempt self.values[indexer] = value, possibly creating a new array.

1014

1015 Parameters

1016 ----------

1017 indexer : tuple, list-like, array-like, slice, int

1018 The subset of self.values to set

1019 value : object

1020 The value being set

1021 using_cow: bool, default False

1022 Signaling if CoW is used.

1023

1024 Returns

1025 -------

1026 Block

1027

1028 Notes

1029 -----

1030 `indexer` is a direct slice/positional indexer. `value` must

1031 be a compatible shape.

1032 """

1033

1034 value = self._standardize_fill_value(value)

1035

1036 values = cast(np.ndarray, self.values)

1037 if self.ndim == 2:

1038 values = values.T

1039

1040 # length checking

1041 check_setitem_lengths(indexer, value, values)

1042

1043 value = extract_array(value, extract_numpy=True)

1044 try:

1045 casted = np_can_hold_element(values.dtype, value)

1046 except LossySetitemError:

1047 # current dtype cannot store value, coerce to common dtype

1048 nb = self.coerce_to_target_dtype(value)

1049 return nb.setitem(indexer, value)

1050 else:

1051 if self.dtype == _dtype_obj:

1052 # TODO: avoid having to construct values[indexer]

1053 vi = values[indexer]

1054 if lib.is_list_like(vi):

1055 # checking lib.is_scalar here fails on

1056 # test_iloc_setitem_custom_object

1057 casted = setitem_datetimelike_compat(values, len(vi), casted)

1058

1059 if using_cow and self.refs.has_reference():

1060 values = values.copy()

1061 self = self.make_block_same_class(

1062 values.T if values.ndim == 2 else values

1063 )

1064 if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1:

1065 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615

1066 casted = casted[0, ...]

1067 values[indexer] = casted

1068 return self

1069

1070 def putmask(self, mask, new, using_cow: bool = False) -> list[Block]:

1071 """

1072 putmask the data to the block; it is possible that we may create a

1073 new dtype of block

1074

1075 Return the resulting block(s).

1076

1077 Parameters

1078 ----------

1079 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray

1080 new : a ndarray/object

1081 using_cow: bool, default False

1082

1083 Returns

1084 -------

1085 List[Block]

1086 """

1087 orig_mask = mask

1088 values = cast(np.ndarray, self.values)

1089 mask, noop = validate_putmask(values.T, mask)

1090 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))

1091

1092 if new is lib.no_default:

1093 new = self.fill_value

1094

1095 new = self._standardize_fill_value(new)

1096 new = extract_array(new, extract_numpy=True)

1097

1098 if noop:

1099 if using_cow:

1100 return [self.copy(deep=False)]

1101 return [self]

1102

1103 try:

1104 casted = np_can_hold_element(values.dtype, new)

1105

1106 if using_cow and self.refs.has_reference():

1107 # Do this here to avoid copying twice

1108 values = values.copy()

1109 self = self.make_block_same_class(values)

1110

1111 putmask_without_repeat(values.T, mask, casted)

1112 if using_cow:

1113 return [self.copy(deep=False)]

1114 return [self]

1115 except LossySetitemError:

1116 if self.ndim == 1 or self.shape[0] == 1:

1117 # no need to split columns

1118

1119 if not is_list_like(new):

1120 # using just new[indexer] can't save us the need to cast

1121 return self.coerce_to_target_dtype(new).putmask(mask, new)

1122 else:

1123 indexer = mask.nonzero()[0]

1124 nb = self.setitem(indexer, new[indexer], using_cow=using_cow)

1125 return [nb]

1126

1127 else:

1128 is_array = isinstance(new, np.ndarray)

1129

1130 res_blocks = []

1131 nbs = self._split()

1132 for i, nb in enumerate(nbs):

1133 n = new

1134 if is_array:

1135 # we have a different value per-column

1136 n = new[:, i : i + 1]

1137

1138 submask = orig_mask[:, i : i + 1]

1139 rbs = nb.putmask(submask, n, using_cow=using_cow)

1140 res_blocks.extend(rbs)

1141 return res_blocks

1142

1143 def where(

1144 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False

1145 ) -> list[Block]:

1146 """

1147 evaluate the block; return result block(s) from the result

1148

1149 Parameters

1150 ----------

1151 other : a ndarray/object

1152 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray

1153 _downcast : str or None, default "infer"

1154 Private because we only specify it when calling from fillna.

1155

1156 Returns

1157 -------

1158 List[Block]

1159 """

1160 assert cond.ndim == self.ndim

1161 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame))

1162

1163 transpose = self.ndim == 2

1164

1165 cond = extract_bool_array(cond)

1166

1167 # EABlocks override where

1168 values = cast(np.ndarray, self.values)

1169 orig_other = other

1170 if transpose:

1171 values = values.T

1172

1173 icond, noop = validate_putmask(values, ~cond)

1174 if noop:

1175 # GH-39595: Always return a copy; short-circuit up/downcasting

1176 if using_cow:

1177 return [self.copy(deep=False)]

1178 return [self.copy()]

1179

1180 if other is lib.no_default:

1181 other = self.fill_value

1182

1183 other = self._standardize_fill_value(other)

1184

1185 try:

1186 # try/except here is equivalent to a self._can_hold_element check,

1187 # but this gets us back 'casted' which we will re-use below;

1188 # without using 'casted', expressions.where may do unwanted upcasts.

1189 casted = np_can_hold_element(values.dtype, other)

1190 except (ValueError, TypeError, LossySetitemError):

1191 # we cannot coerce, return a compat dtype

1192

1193 if self.ndim == 1 or self.shape[0] == 1:

1194 # no need to split columns

1195

1196 block = self.coerce_to_target_dtype(other)

1197 blocks = block.where(orig_other, cond, using_cow=using_cow)

1198 return self._maybe_downcast(

1199 blocks, downcast=_downcast, using_cow=using_cow

1200 )

1201

1202 else:

1203 # since _maybe_downcast would split blocks anyway, we

1204 # can avoid some potential upcast/downcast by splitting

1205 # on the front end.

1206 is_array = isinstance(other, (np.ndarray, ExtensionArray))

1207

1208 res_blocks = []

1209 nbs = self._split()

1210 for i, nb in enumerate(nbs):

1211 oth = other

1212 if is_array:

1213 # we have a different value per-column

1214 oth = other[:, i : i + 1]

1215

1216 submask = cond[:, i : i + 1]

1217 rbs = nb.where(

1218 oth, submask, _downcast=_downcast, using_cow=using_cow

1219 )

1220 res_blocks.extend(rbs)

1221 return res_blocks

1222

1223 else:

1224 other = casted

1225 alt = setitem_datetimelike_compat(values, icond.sum(), other)

1226 if alt is not other:

1227 if is_list_like(other) and len(other) < len(values):

1228 # call np.where with other to get the appropriate ValueError

1229 np.where(~icond, values, other)

1230 raise NotImplementedError(

1231 "This should not be reached; call to np.where above is "

1232 "expected to raise ValueError. Please report a bug at "

1233 "github.com/pandas-dev/pandas"

1234 )

1235 result = values.copy()

1236 np.putmask(result, icond, alt)

1237 else:

1238 # By the time we get here, we should have all Series/Index

1239 # args extracted to ndarray

1240 if (

1241 is_list_like(other)

1242 and not isinstance(other, np.ndarray)

1243 and len(other) == self.shape[-1]

1244 ):

1245 # If we don't do this broadcasting here, then expressions.where

1246 # will broadcast a 1D other to be row-like instead of

1247 # column-like.

1248 other = np.array(other).reshape(values.shape)

1249 # If lengths don't match (or len(other)==1), we will raise

1250 # inside expressions.where, see test_series_where

1251

1252 # Note: expressions.where may upcast.

1253 result = expressions.where(~icond, values, other)

1254 # The np_can_hold_element check _should_ ensure that we always

1255 # have result.dtype == self.dtype here.

1256

1257 if transpose:

1258 result = result.T

1259

1260 return [self.make_block(result)]

1261

1262 def fillna(

1263 self,

1264 value,

1265 limit: int | None = None,

1266 inplace: bool = False,

1267 downcast=None,

1268 using_cow: bool = False,

1269 ) -> list[Block]:

1270 """

1271 fillna on the block with the value. If we fail, then convert to

1272 ObjectBlock and try again

1273 """

1274 # Caller is responsible for validating limit; if int it is strictly positive

1275 inplace = validate_bool_kwarg(inplace, "inplace")

1276

1277 if not self._can_hold_na:

1278 # can short-circuit the isna call

1279 noop = True

1280 else:

1281 mask = isna(self.values)

1282 mask, noop = validate_putmask(self.values, mask)

1283

1284 if noop:

1285 # we can't process the value, but nothing to do

1286 if inplace:

1287 if using_cow:

1288 return [self.copy(deep=False)]

1289 # Arbitrarily imposing the convention that we ignore downcast

1290 # on no-op when inplace=True

1291 return [self]

1292 else:

1293 # GH#45423 consistent downcasting on no-ops.

1294 nb = self.copy(deep=not using_cow)

1295 nbs = nb._maybe_downcast([nb], downcast=downcast, using_cow=using_cow)

1296 return nbs

1297

1298 if limit is not None:

1299 mask[mask.cumsum(self.ndim - 1) > limit] = False

1300

1301 if inplace:

1302 nbs = self.putmask(mask.T, value, using_cow=using_cow)

1303 else:

1304 # without _downcast, we would break

1305 # test_fillna_dtype_conversion_equiv_replace

1306 nbs = self.where(value, ~mask.T, _downcast=False)

1307

1308 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs)

1309 # makes a difference bc blk may have object dtype, which has

1310 # different behavior in _maybe_downcast.

1311 return extend_blocks(

1312 [

1313 blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow)

1314 for blk in nbs

1315 ]

1316 )

1317

1318 def interpolate(

1319 self,

1320 *,

1321 method: FillnaOptions = "pad",

1322 axis: AxisInt = 0,

1323 index: Index | None = None,

1324 inplace: bool = False,

1325 limit: int | None = None,

1326 limit_direction: str = "forward",

1327 limit_area: str | None = None,

1328 fill_value: Any | None = None,

1329 downcast: str | None = None,

1330 using_cow: bool = False,

1331 **kwargs,

1332 ) -> list[Block]:

1333 inplace = validate_bool_kwarg(inplace, "inplace")

1334

1335 if not self._can_hold_na:

1336 # If there are no NAs, then interpolate is a no-op

1337 if using_cow:

1338 return [self.copy(deep=False)]

1339 return [self] if inplace else [self.copy()]

1340

1341 try:

1342 m = missing.clean_fill_method(method)

1343 except ValueError:

1344 m = None

1345 if m is None and self.dtype.kind != "f":

1346 # only deal with floats

1347 # bc we already checked that can_hold_na, we don't have int dtype here

1348 # test_interp_basic checks that we make a copy here

1349 if using_cow:

1350 return [self.copy(deep=False)]

1351 return [self] if inplace else [self.copy()]

1352

1353 if self.is_object and self.ndim == 2 and self.shape[0] != 1 and axis == 0:

1354 # split improves performance in ndarray.copy()

1355 return self.split_and_operate(

1356 type(self).interpolate,

1357 method=method,

1358 axis=axis,

1359 index=index,

1360 inplace=inplace,

1361 limit=limit,

1362 limit_direction=limit_direction,

1363 limit_area=limit_area,

1364 fill_value=fill_value,

1365 downcast=downcast,

1366 **kwargs,

1367 )

1368

1369 refs = None

1370 if inplace:

1371 if using_cow and self.refs.has_reference():

1372 data = self.values.copy()

1373 else:

1374 data = self.values

1375 refs = self.refs

1376 else:

1377 data = self.values.copy()

1378 data = cast(np.ndarray, data) # bc overridden by ExtensionBlock

1379

1380 missing.interpolate_array_2d(

1381 data,

1382 method=method,

1383 axis=axis,

1384 index=index,

1385 limit=limit,

1386 limit_direction=limit_direction,

1387 limit_area=limit_area,

1388 fill_value=fill_value,

1389 **kwargs,

1390 )

1391

1392 nb = self.make_block_same_class(data, refs=refs)

1393 return nb._maybe_downcast([nb], downcast, using_cow)

1394

1395 def diff(self, n: int, axis: AxisInt = 1) -> list[Block]:

1396 """return block for the diff of the values"""

1397 # only reached with ndim == 2 and axis == 1

1398 new_values = algos.diff(self.values, n, axis=axis)

1399 return [self.make_block(values=new_values)]

1400

1401 def shift(

1402 self, periods: int, axis: AxisInt = 0, fill_value: Any = None

1403 ) -> list[Block]:

1404 """shift the block by periods, possibly upcast"""

1405 # convert integer to float if necessary. need to do a lot more than

1406 # that, handle boolean etc also

1407

1408 # Note: periods is never 0 here, as that is handled at the top of

1409 # NDFrame.shift. If that ever changes, we can do a check for periods=0

1410 # and possibly avoid coercing.

1411

1412 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj:

1413 # with object dtype there is nothing to promote, and the user can

1414 # pass pretty much any weird fill_value they like

1415 # see test_shift_object_non_scalar_fill

1416 raise ValueError("fill_value must be a scalar")

1417

1418 fill_value = self._standardize_fill_value(fill_value)

1419

1420 try:

1421 # error: Argument 1 to "np_can_hold_element" has incompatible type

1422 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"

1423 casted = np_can_hold_element(

1424 self.dtype, fill_value # type: ignore[arg-type]

1425 )

1426 except LossySetitemError:

1427 nb = self.coerce_to_target_dtype(fill_value)

1428 return nb.shift(periods, axis=axis, fill_value=fill_value)

1429

1430 else:

1431 values = cast(np.ndarray, self.values)

1432 new_values = shift(values, periods, axis, casted)

1433 return [self.make_block(new_values)]

1434

1435 @final

1436 def quantile(

1437 self,

1438 qs: Index, # with dtype float64

1439 interpolation: QuantileInterpolation = "linear",

1440 axis: AxisInt = 0,

1441 ) -> Block:

1442 """

1443 compute the quantiles of the

1444

1445 Parameters

1446 ----------

1447 qs : Index

1448 The quantiles to be computed in float64.

1449 interpolation : str, default 'linear'

1450 Type of interpolation.

1451 axis : int, default 0

1452 Axis to compute.

1453

1454 Returns

1455 -------

1456 Block

1457 """

1458 # We should always have ndim == 2 because Series dispatches to DataFrame

1459 assert self.ndim == 2

1460 assert axis == 1 # only ever called this way

1461 assert is_list_like(qs) # caller is responsible for this

1462

1463 result = quantile_compat(self.values, np.asarray(qs._values), interpolation)

1464 # ensure_block_shape needed for cases where we start with EA and result

1465 # is ndarray, e.g. IntegerArray, SparseArray

1466 result = ensure_block_shape(result, ndim=2)

1467 return new_block_2d(result, placement=self._mgr_locs)

1468

1469 def round(self, decimals: int, using_cow: bool = False) -> Block:

1470 """

1471 Rounds the values.

1472 If the block is not of an integer or float dtype, nothing happens.

1473 This is consistent with DataFrame.round behavivor.

1474 (Note: Series.round would raise)

1475

1476 Parameters

1477 ----------

1478 decimals: int,

1479 Number of decimal places to round to.

1480 Caller is responsible for validating this

1481 using_cow: bool,

1482 Whether Copy on Write is enabled right now

1483 """

1484 if not self.is_numeric or self.is_bool:

1485 return self.copy(deep=not using_cow)

1486 refs = None

1487 # TODO: round only defined on BaseMaskedArray

1488 # Series also does this, so would need to fix both places

1489 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]"

1490 # has no attribute "round"

1491 values = self.values.round(decimals) # type: ignore[union-attr]

1492 if values is self.values:

1493 refs = self.refs

1494 if not using_cow:

1495 # Normally would need to do this before, but

1496 # numpy only returns same array when round operation

1497 # is no-op

1498 # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636

1499 values = values.copy()

1500 return self.make_block_same_class(values, refs=refs)

1501

1502 # ---------------------------------------------------------------------

1503 # Abstract Methods Overridden By EABackedBlock and NumpyBlock

1504

1505 def delete(self, loc) -> list[Block]:

1506 """Deletes the locs from the block.

1507

1508 We split the block to avoid copying the underlying data. We create new

1509 blocks for every connected segment of the initial block that is not deleted.

1510 The new blocks point to the initial array.

1511 """

1512 if not is_list_like(loc):

1513 loc = [loc]

1514

1515 if self.ndim == 1:

1516 values = cast(np.ndarray, self.values)

1517 values = np.delete(values, loc)

1518 mgr_locs = self._mgr_locs.delete(loc)

1519 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]

1520

1521 if np.max(loc) >= self.values.shape[0]:

1522 raise IndexError

1523

1524 # Add one out-of-bounds indexer as maximum to collect

1525 # all columns after our last indexer if any

1526 loc = np.concatenate([loc, [self.values.shape[0]]])

1527 mgr_locs_arr = self._mgr_locs.as_array

1528 new_blocks: list[Block] = []

1529

1530 previous_loc = -1

1531 # TODO(CoW): This is tricky, if parent block goes out of scope

1532 # all split blocks are referencing each other even though they

1533 # don't share data

1534 refs = self.refs if self.refs.has_reference() else None

1535 for idx in loc:

1536 if idx == previous_loc + 1:

1537 # There is no column between current and last idx

1538 pass

1539 else:

1540 # No overload variant of "__getitem__" of "ExtensionArray" matches

1541 # argument type "Tuple[slice, slice]"

1542 values = self.values[previous_loc + 1 : idx, :] # type: ignore[call-overload] # noqa

1543 locs = mgr_locs_arr[previous_loc + 1 : idx]

1544 nb = type(self)(

1545 values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs

1546 )

1547 new_blocks.append(nb)

1548

1549 previous_loc = idx

1550

1551 return new_blocks

1552

1553 @property

1554 def is_view(self) -> bool:

1555 """return a boolean if I am possibly a view"""

1556 raise AbstractMethodError(self)

1557

1558 @property

1559 def array_values(self) -> ExtensionArray:

1560 """

1561 The array that Series.array returns. Always an ExtensionArray.

1562 """

1563 raise AbstractMethodError(self)

1564

1565 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

1566 """

1567 return an internal format, currently just the ndarray

1568 this is often overridden to handle to_dense like operations

1569 """

1570 raise AbstractMethodError(self)

1571

1572 def values_for_json(self) -> np.ndarray:

1573 raise AbstractMethodError(self)

1574

1575

1576class EABackedBlock(Block):

1577 """

1578 Mixin for Block subclasses backed by ExtensionArray.

1579 """

1580

1581 values: ExtensionArray

1582

1583 def setitem(self, indexer, value, using_cow: bool = False):

1584 """

1585 Attempt self.values[indexer] = value, possibly creating a new array.

1586

1587 This differs from Block.setitem by not allowing setitem to change

1588 the dtype of the Block.

1589

1590 Parameters

1591 ----------

1592 indexer : tuple, list-like, array-like, slice, int

1593 The subset of self.values to set

1594 value : object

1595 The value being set

1596 using_cow: bool, default False

1597 Signaling if CoW is used.

1598

1599 Returns

1600 -------

1601 Block

1602

1603 Notes

1604 -----

1605 `indexer` is a direct slice/positional indexer. `value` must

1606 be a compatible shape.

1607 """

1608 orig_indexer = indexer

1609 orig_value = value

1610

1611 indexer = self._unwrap_setitem_indexer(indexer)

1612 value = self._maybe_squeeze_arg(value)

1613

1614 values = self.values

1615 if values.ndim == 2:

1616 # TODO(GH#45419): string[pyarrow] tests break if we transpose

1617 # unconditionally

1618 values = values.T

1619 check_setitem_lengths(indexer, value, values)

1620

1621 try:

1622 values[indexer] = value

1623 except (ValueError, TypeError) as err:

1624 _catch_deprecated_value_error(err)

1625

1626 if is_interval_dtype(self.dtype):

1627 # see TestSetitemFloatIntervalWithIntIntervalValues

1628 nb = self.coerce_to_target_dtype(orig_value)

1629 return nb.setitem(orig_indexer, orig_value)

1630

1631 elif isinstance(self, NDArrayBackedExtensionBlock):

1632 nb = self.coerce_to_target_dtype(orig_value)

1633 return nb.setitem(orig_indexer, orig_value)

1634

1635 else:

1636 raise

1637

1638 else:

1639 return self

1640

1641 def where(

1642 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False

1643 ) -> list[Block]:

1644 # _downcast private bc we only specify it when calling from fillna

1645 arr = self.values.T

1646

1647 cond = extract_bool_array(cond)

1648

1649 orig_other = other

1650 orig_cond = cond

1651 other = self._maybe_squeeze_arg(other)

1652 cond = self._maybe_squeeze_arg(cond)

1653

1654 if other is lib.no_default:

1655 other = self.fill_value

1656

1657 icond, noop = validate_putmask(arr, ~cond)

1658 if noop:

1659 # GH#44181, GH#45135

1660 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast

1661 if using_cow:

1662 return [self.copy(deep=False)]

1663 return [self.copy()]

1664

1665 try:

1666 res_values = arr._where(cond, other).T

1667 except (ValueError, TypeError) as err:

1668 _catch_deprecated_value_error(err)

1669

1670 if self.ndim == 1 or self.shape[0] == 1:

1671 if is_interval_dtype(self.dtype):

1672 # TestSetitemFloatIntervalWithIntIntervalValues

1673 blk = self.coerce_to_target_dtype(orig_other)

1674 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)

1675 return self._maybe_downcast(

1676 nbs, downcast=_downcast, using_cow=using_cow

1677 )

1678

1679 elif isinstance(self, NDArrayBackedExtensionBlock):

1680 # NB: not (yet) the same as

1681 # isinstance(values, NDArrayBackedExtensionArray)

1682 blk = self.coerce_to_target_dtype(orig_other)

1683 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)

1684 return self._maybe_downcast(

1685 nbs, downcast=_downcast, using_cow=using_cow

1686 )

1687

1688 else:

1689 raise

1690

1691 else:

1692 # Same pattern we use in Block.putmask

1693 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray))

1694

1695 res_blocks = []

1696 nbs = self._split()

1697 for i, nb in enumerate(nbs):

1698 n = orig_other

1699 if is_array:

1700 # we have a different value per-column

1701 n = orig_other[:, i : i + 1]

1702

1703 submask = orig_cond[:, i : i + 1]

1704 rbs = nb.where(n, submask, using_cow=using_cow)

1705 res_blocks.extend(rbs)

1706 return res_blocks

1707

1708 nb = self.make_block_same_class(res_values)

1709 return [nb]

1710

1711 def putmask(self, mask, new, using_cow: bool = False) -> list[Block]:

1712 """

1713 See Block.putmask.__doc__

1714 """

1715 mask = extract_bool_array(mask)

1716 if new is lib.no_default:

1717 new = self.fill_value

1718

1719 values = self.values

1720 if values.ndim == 2:

1721 values = values.T

1722

1723 orig_new = new

1724 orig_mask = mask

1725 new = self._maybe_squeeze_arg(new)

1726 mask = self._maybe_squeeze_arg(mask)

1727

1728 if not mask.any():

1729 if using_cow:

1730 return [self.copy(deep=False)]

1731 return [self]

1732

1733 if using_cow and self.refs.has_reference():

1734 values = values.copy()

1735 self = self.make_block_same_class( # type: ignore[assignment]

1736 values.T if values.ndim == 2 else values

1737 )

1738

1739 try:

1740 # Caller is responsible for ensuring matching lengths

1741 values._putmask(mask, new)

1742 except (TypeError, ValueError) as err:

1743 _catch_deprecated_value_error(err)

1744

1745 if self.ndim == 1 or self.shape[0] == 1:

1746 if is_interval_dtype(self.dtype):

1747 # Discussion about what we want to support in the general

1748 # case GH#39584

1749 blk = self.coerce_to_target_dtype(orig_new)

1750 return blk.putmask(orig_mask, orig_new)

1751

1752 elif isinstance(self, NDArrayBackedExtensionBlock):

1753 # NB: not (yet) the same as

1754 # isinstance(values, NDArrayBackedExtensionArray)

1755 blk = self.coerce_to_target_dtype(orig_new)

1756 return blk.putmask(orig_mask, orig_new)

1757

1758 else:

1759 raise

1760

1761 else:

1762 # Same pattern we use in Block.putmask

1763 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))

1764

1765 res_blocks = []

1766 nbs = self._split()

1767 for i, nb in enumerate(nbs):

1768 n = orig_new

1769 if is_array:

1770 # we have a different value per-column

1771 n = orig_new[:, i : i + 1]

1772

1773 submask = orig_mask[:, i : i + 1]

1774 rbs = nb.putmask(submask, n)

1775 res_blocks.extend(rbs)

1776 return res_blocks

1777

1778 return [self]

1779

1780 def delete(self, loc) -> list[Block]:

1781 # This will be unnecessary if/when __array_function__ is implemented

1782 if self.ndim == 1:

1783 values = self.values.delete(loc)

1784 mgr_locs = self._mgr_locs.delete(loc)

1785 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]

1786 elif self.values.ndim == 1:

1787 # We get here through to_stata

1788 return []

1789 return super().delete(loc)

1790

1791 @cache_readonly

1792 def array_values(self) -> ExtensionArray:

1793 return self.values

1794

1795 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

1796 """

1797 return object dtype as boxed values, such as Timestamps/Timedelta

1798 """

1799 values: ArrayLike = self.values

1800 if dtype == _dtype_obj:

1801 values = values.astype(object)

1802 # TODO(EA2D): reshape not needed with 2D EAs

1803 return np.asarray(values).reshape(self.shape)

1804

1805 def values_for_json(self) -> np.ndarray:

1806 return np.asarray(self.values)

1807

1808 def interpolate(

1809 self,

1810 *,

1811 method: FillnaOptions = "pad",

1812 axis: int = 0,

1813 inplace: bool = False,

1814 limit: int | None = None,

1815 fill_value=None,

1816 using_cow: bool = False,

1817 **kwargs,

1818 ):

1819 values = self.values

1820 if values.ndim == 2 and axis == 0:

1821 # NDArrayBackedExtensionArray.fillna assumes axis=1

1822 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T

1823 else:

1824 new_values = values.fillna(value=fill_value, method=method, limit=limit)

1825 return self.make_block_same_class(new_values)

1826

1827

1828class ExtensionBlock(libinternals.Block, EABackedBlock):

1829 """

1830 Block for holding extension types.

1831

1832 Notes

1833 -----

1834 This holds all 3rd-party extension array types. It's also the immediate

1835 parent class for our internal extension types' blocks.

1836

1837 ExtensionArrays are limited to 1-D.

1838 """

1839

1840 _can_consolidate = False

1841 _validate_ndim = False

1842 is_extension = True

1843

1844 values: ExtensionArray

1845

1846 def fillna(

1847 self,

1848 value,

1849 limit: int | None = None,

1850 inplace: bool = False,

1851 downcast=None,

1852 using_cow: bool = False,

1853 ) -> list[Block]:

1854 if is_interval_dtype(self.dtype):

1855 # Block.fillna handles coercion (test_fillna_interval)

1856 return super().fillna(

1857 value=value,

1858 limit=limit,

1859 inplace=inplace,

1860 downcast=downcast,

1861 using_cow=using_cow,

1862 )

1863 if using_cow and self._can_hold_na and not self.values._hasna:

1864 refs = self.refs

1865 new_values = self.values

1866 else:

1867 refs = None

1868 new_values = self.values.fillna(value=value, method=None, limit=limit)

1869 nb = self.make_block_same_class(new_values, refs=refs)

1870 return nb._maybe_downcast([nb], downcast, using_cow=using_cow)

1871

1872 @cache_readonly

1873 def shape(self) -> Shape:

1874 # TODO(EA2D): override unnecessary with 2D EAs

1875 if self.ndim == 1:

1876 return (len(self.values),)

1877 return len(self._mgr_locs), len(self.values)

1878

1879 def iget(self, i: int | tuple[int, int] | tuple[slice, int]):

1880 # In the case where we have a tuple[slice, int], the slice will always

1881 # be slice(None)

1882 # We _could_ make the annotation more specific, but mypy would

1883 # complain about override mismatch:

1884 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int]

1885

1886 # Note: only reached with self.ndim == 2

1887

1888 if isinstance(i, tuple):

1889 # TODO(EA2D): unnecessary with 2D EAs

1890 col, loc = i

1891 if not com.is_null_slice(col) and col != 0:

1892 raise IndexError(f"{self} only contains one item")

1893 if isinstance(col, slice):

1894 # the is_null_slice check above assures that col is slice(None)

1895 # so what we want is a view on all our columns and row loc

1896 if loc < 0:

1897 loc += len(self.values)

1898 # Note: loc:loc+1 vs [[loc]] makes a difference when called

1899 # from fast_xs because we want to get a view back.

1900 return self.values[loc : loc + 1]

1901 return self.values[loc]

1902 else:

1903 if i != 0:

1904 raise IndexError(f"{self} only contains one item")

1905 return self.values

1906

1907 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:

1908 # When an ndarray, we should have locs.tolist() == [0]

1909 # When a BlockPlacement we should have list(locs) == [0]

1910 if copy:

1911 self.values = self.values.copy()

1912 self.values[:] = values

1913

1914 def _maybe_squeeze_arg(self, arg):

1915 """

1916 If necessary, squeeze a (N, 1) ndarray to (N,)

1917 """

1918 # e.g. if we are passed a 2D mask for putmask

1919 if (

1920 isinstance(arg, (np.ndarray, ExtensionArray))

1921 and arg.ndim == self.values.ndim + 1

1922 ):

1923 # TODO(EA2D): unnecessary with 2D EAs

1924 assert arg.shape[1] == 1

1925 # error: No overload variant of "__getitem__" of "ExtensionArray"

1926 # matches argument type "Tuple[slice, int]"

1927 arg = arg[:, 0] # type: ignore[call-overload]

1928 elif isinstance(arg, ABCDataFrame):

1929 # 2022-01-06 only reached for setitem

1930 # TODO: should we avoid getting here with DataFrame?

1931 assert arg.shape[1] == 1

1932 arg = arg._ixs(0, axis=1)._values

1933

1934 return arg

1935

1936 def _unwrap_setitem_indexer(self, indexer):

1937 """

1938 Adapt a 2D-indexer to our 1D values.

1939

1940 This is intended for 'setitem', not 'iget' or '_slice'.

1941 """

1942 # TODO: ATM this doesn't work for iget/_slice, can we change that?

1943

1944 if isinstance(indexer, tuple) and len(indexer) == 2:

1945 # TODO(EA2D): not needed with 2D EAs

1946 # Should never have length > 2. Caller is responsible for checking.

1947 # Length 1 is reached vis setitem_single_block and setitem_single_column

1948 # each of which pass indexer=(pi,)

1949 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer):

1950 # GH#44703 went through indexing.maybe_convert_ix

1951 first, second = indexer

1952 if not (

1953 second.size == 1 and (second == 0).all() and first.shape[1] == 1

1954 ):

1955 raise NotImplementedError(

1956 "This should not be reached. Please report a bug at "

1957 "github.com/pandas-dev/pandas/"

1958 )

1959 indexer = first[:, 0]

1960

1961 elif lib.is_integer(indexer[1]) and indexer[1] == 0:

1962 # reached via setitem_single_block passing the whole indexer

1963 indexer = indexer[0]

1964

1965 elif com.is_null_slice(indexer[1]):

1966 indexer = indexer[0]

1967

1968 elif is_list_like(indexer[1]) and indexer[1][0] == 0:

1969 indexer = indexer[0]

1970

1971 else:

1972 raise NotImplementedError(

1973 "This should not be reached. Please report a bug at "

1974 "github.com/pandas-dev/pandas/"

1975 )

1976 return indexer

1977

1978 @property

1979 def is_view(self) -> bool:

1980 """Extension arrays are never treated as views."""

1981 return False

1982

1983 @cache_readonly

1984 def is_numeric(self):

1985 return self.values.dtype._is_numeric

1986

1987 def _slice(

1988 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]

1989 ) -> ExtensionArray:

1990 """

1991 Return a slice of my values.

1992

1993 Parameters

1994 ----------

1995 slicer : slice, ndarray[int], or ndarray[bool]

1996 Valid (non-reducing) indexer for self.values.

1997

1998 Returns

1999 -------

2000 ExtensionArray

2001 """

2002 # Notes: ndarray[bool] is only reachable when via getitem_mgr, which

2003 # is only for Series, i.e. self.ndim == 1.

2004

2005 # return same dims as we currently have

2006 if self.ndim == 2:

2007 # reached via getitem_block via _slice_take_blocks_ax0

2008 # TODO(EA2D): won't be necessary with 2D EAs

2009

2010 if not isinstance(slicer, slice):

2011 raise AssertionError(

2012 "invalid slicing for a 1-ndim ExtensionArray", slicer

2013 )

2014 # GH#32959 only full-slicers along fake-dim0 are valid

2015 # TODO(EA2D): won't be necessary with 2D EAs

2016 # range(1) instead of self._mgr_locs to avoid exception on [::-1]

2017 # see test_iloc_getitem_slice_negative_step_ea_block

2018 new_locs = range(1)[slicer]

2019 if not len(new_locs):

2020 raise AssertionError(

2021 "invalid slicing for a 1-ndim ExtensionArray", slicer

2022 )

2023 slicer = slice(None)

2024

2025 return self.values[slicer]

2026

2027 @final

2028 def getitem_block_index(self, slicer: slice) -> ExtensionBlock:

2029 """

2030 Perform __getitem__-like specialized to slicing along index.

2031 """

2032 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't

2033 # require subclasses of ExtensionArray to support that form (for now).

2034 new_values = self.values[slicer]

2035 return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)

2036

2037 def diff(self, n: int, axis: AxisInt = 1) -> list[Block]:

2038 # only reached with ndim == 2 and axis == 1

2039 # TODO(EA2D): Can share with NDArrayBackedExtensionBlock

2040 new_values = algos.diff(self.values, n, axis=0)

2041 return [self.make_block(values=new_values)]

2042

2043 def shift(

2044 self, periods: int, axis: AxisInt = 0, fill_value: Any = None

2045 ) -> list[Block]:

2046 """

2047 Shift the block by `periods`.

2048

2049 Dispatches to underlying ExtensionArray and re-boxes in an

2050 ExtensionBlock.

2051 """

2052 new_values = self.values.shift(periods=periods, fill_value=fill_value)

2053 return [self.make_block_same_class(new_values)]

2054

2055 def _unstack(

2056 self,

2057 unstacker,

2058 fill_value,

2059 new_placement: npt.NDArray[np.intp],

2060 needs_masking: npt.NDArray[np.bool_],

2061 ):

2062 # ExtensionArray-safe unstack.

2063 # We override ObjectBlock._unstack, which unstacks directly on the

2064 # values of the array. For EA-backed blocks, this would require

2065 # converting to a 2-D ndarray of objects.

2066 # Instead, we unstack an ndarray of integer positions, followed by

2067 # a `take` on the actual values.

2068

2069 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index)

2070 new_values, mask = unstacker.arange_result

2071

2072 # Note: these next two lines ensure that

2073 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)

2074 # which the calling function needs in order to pass verify_integrity=False

2075 # to the BlockManager constructor

2076 new_values = new_values.T[mask]

2077 new_placement = new_placement[mask]

2078

2079 # needs_masking[i] calculated once in BlockManager.unstack tells

2080 # us if there are any -1s in the relevant indices. When False,

2081 # that allows us to go through a faster path in 'take', among

2082 # other things avoiding e.g. Categorical._validate_scalar.

2083 blocks = [

2084 # TODO: could cast to object depending on fill_value?

2085 type(self)(

2086 self.values.take(

2087 indices, allow_fill=needs_masking[i], fill_value=fill_value

2088 ),

2089 BlockPlacement(place),

2090 ndim=2,

2091 )

2092 for i, (indices, place) in enumerate(zip(new_values, new_placement))

2093 ]

2094 return blocks, mask

2095

2096

2097class NumpyBlock(libinternals.NumpyBlock, Block):

2098 values: np.ndarray

2099

2100 @property

2101 def is_view(self) -> bool:

2102 """return a boolean if I am possibly a view"""

2103 return self.values.base is not None

2104

2105 @property

2106 def array_values(self) -> ExtensionArray:

2107 return PandasArray(self.values)

2108

2109 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:

2110 if dtype == _dtype_obj:

2111 return self.values.astype(_dtype_obj)

2112 return self.values

2113

2114 def values_for_json(self) -> np.ndarray:

2115 return self.values

2116

2117

2118class NumericBlock(NumpyBlock):

2119 __slots__ = ()

2120 is_numeric = True

2121

2122

2123class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):

2124 """

2125 Block backed by an NDArrayBackedExtensionArray

2126 """

2127

2128 values: NDArrayBackedExtensionArray

2129

2130 # error: Signature of "is_extension" incompatible with supertype "Block"

2131 @cache_readonly

2132 def is_extension(self) -> bool: # type: ignore[override]

2133 # i.e. datetime64tz, PeriodDtype

2134 return not isinstance(self.dtype, np.dtype)

2135

2136 @property

2137 def is_view(self) -> bool:

2138 """return a boolean if I am possibly a view"""

2139 # check the ndarray values of the DatetimeIndex values

2140 return self.values._ndarray.base is not None

2141

2142 def diff(self, n: int, axis: AxisInt = 0) -> list[Block]:

2143 """

2144 1st discrete difference.

2145

2146 Parameters

2147 ----------

2148 n : int

2149 Number of periods to diff.

2150 axis : int, default 0

2151 Axis to diff upon.

2152

2153 Returns

2154 -------

2155 A list with a new Block.

2156

2157 Notes

2158 -----

2159 The arguments here are mimicking shift so they are called correctly

2160 by apply.

2161 """

2162 # only reached with ndim == 2 and axis == 1

2163 values = self.values

2164

2165 new_values = values - values.shift(n, axis=axis)

2166 return [self.make_block(new_values)]

2167

2168 def shift(

2169 self, periods: int, axis: AxisInt = 0, fill_value: Any = None

2170 ) -> list[Block]:

2171 values = self.values

2172 new_values = values.shift(periods, fill_value=fill_value, axis=axis)

2173 return [self.make_block_same_class(new_values)]

2174

2175

2176def _catch_deprecated_value_error(err: Exception) -> None:

2177 """

2178 We catch ValueError for now, but only a specific one raised by DatetimeArray

2179 which will no longer be raised in version.2.0.

2180 """

2181 if isinstance(err, ValueError):

2182 if isinstance(err, IncompatibleFrequency):

2183 pass

2184 elif "'value.closed' is" in str(err):

2185 # IntervalDtype mismatched 'closed'

2186 pass

2187

2188

2189class DatetimeLikeBlock(NDArrayBackedExtensionBlock):

2190 """Block for datetime64[ns], timedelta64[ns]."""

2191

2192 __slots__ = ()

2193 is_numeric = False

2194 values: DatetimeArray | TimedeltaArray

2195

2196 def values_for_json(self) -> np.ndarray:

2197 return self.values._ndarray

2198

2199 def interpolate(

2200 self,

2201 *,

2202 method: FillnaOptions = "pad",

2203 index: Index | None = None,

2204 axis: int = 0,

2205 inplace: bool = False,

2206 limit: int | None = None,

2207 fill_value=None,

2208 using_cow: bool = False,

2209 **kwargs,

2210 ):

2211 values = self.values

2212

2213 # error: Non-overlapping equality check (left operand type:

2214 # "Literal['backfill', 'bfill', 'ffill', 'pad']", right operand type:

2215 # "Literal['linear']") [comparison-overlap]

2216 if method == "linear": # type: ignore[comparison-overlap]

2217 # TODO: GH#50950 implement for arbitrary EAs

2218 refs = None

2219 if using_cow:

2220 if inplace and not self.refs.has_reference():

2221 data_out = values._ndarray

2222 refs = self.refs

2223 else:

2224 data_out = values._ndarray.copy()

2225 else:

2226 data_out = values._ndarray if inplace else values._ndarray.copy()

2227 missing.interpolate_array_2d(

2228 data_out, method=method, limit=limit, index=index, axis=axis

2229 )

2230 new_values = type(values)._simple_new(data_out, dtype=values.dtype)

2231 return self.make_block_same_class(new_values, refs=refs)

2232

2233 elif values.ndim == 2 and axis == 0:

2234 # NDArrayBackedExtensionArray.fillna assumes axis=1

2235 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T

2236 else:

2237 new_values = values.fillna(value=fill_value, method=method, limit=limit)

2238 return self.make_block_same_class(new_values)

2239

2240

2241class DatetimeTZBlock(DatetimeLikeBlock):

2242 """implement a datetime64 block with a tz attribute"""

2243

2244 values: DatetimeArray

2245

2246 __slots__ = ()

2247 is_extension = True

2248 _validate_ndim = True

2249 _can_consolidate = False

2250

2251 # Don't use values_for_json from DatetimeLikeBlock since it is

2252 # an invalid optimization here(drop the tz)

2253 values_for_json = NDArrayBackedExtensionBlock.values_for_json

2254

2255

2256class ObjectBlock(NumpyBlock):

2257 __slots__ = ()

2258 is_object = True

2259

2260 @maybe_split

2261 def convert(

2262 self,

2263 *,

2264 copy: bool = True,

2265 using_cow: bool = False,

2266 ) -> list[Block]:

2267 """

2268 attempt to cast any object types to better types return a copy of

2269 the block (if copy = True) by definition we ARE an ObjectBlock!!!!!

2270 """

2271 if self.dtype != _dtype_obj:

2272 # GH#50067 this should be impossible in ObjectBlock, but until

2273 # that is fixed, we short-circuit here.

2274 if using_cow:

2275 return [self.copy(deep=False)]

2276 return [self]

2277

2278 values = self.values

2279 if values.ndim == 2:

2280 # maybe_split ensures we only get here with values.shape[0] == 1,

2281 # avoid doing .ravel as that might make a copy

2282 values = values[0]

2283

2284 res_values = lib.maybe_convert_objects(

2285 values,

2286 convert_datetime=True,

2287 convert_timedelta=True,

2288 convert_period=True,

2289 convert_interval=True,

2290 )

2291 refs = None

2292 if copy and res_values is values:

2293 res_values = values.copy()

2294 elif res_values is values and using_cow:

2295 refs = self.refs

2296

2297 res_values = ensure_block_shape(res_values, self.ndim)

2298 return [self.make_block(res_values, refs=refs)]

2299

2300

2301# -----------------------------------------------------------------

2302# Constructor Helpers

2303

2304

2305def maybe_coerce_values(values: ArrayLike) -> ArrayLike:

2306 """

2307 Input validation for values passed to __init__. Ensure that

2308 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure

2309 that we do not have string dtypes.

2310

2311 Parameters

2312 ----------

2313 values : np.ndarray or ExtensionArray

2314

2315 Returns

2316 -------

2317 values : np.ndarray or ExtensionArray

2318 """

2319 # Caller is responsible for ensuring PandasArray is already extracted.

2320

2321 if isinstance(values, np.ndarray):

2322 values = ensure_wrapped_if_datetimelike(values)

2323

2324 if issubclass(values.dtype.type, str):

2325 values = np.array(values, dtype=object)

2326

2327 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None:

2328 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame

2329 values = values._with_freq(None)

2330

2331 return values

2332

2333

2334def get_block_type(dtype: DtypeObj):

2335 """

2336 Find the appropriate Block subclass to use for the given values and dtype.

2337

2338 Parameters

2339 ----------

2340 dtype : numpy or pandas dtype

2341

2342 Returns

2343 -------

2344 cls : class, subclass of Block

2345 """

2346 # We use kind checks because it is much more performant

2347 # than is_foo_dtype

2348 kind = dtype.kind

2349

2350 cls: type[Block]

2351

2352 if isinstance(dtype, SparseDtype):

2353 # Need this first(ish) so that Sparse[datetime] is sparse

2354 cls = ExtensionBlock

2355 elif isinstance(dtype, DatetimeTZDtype):

2356 cls = DatetimeTZBlock

2357 elif isinstance(dtype, PeriodDtype):

2358 cls = NDArrayBackedExtensionBlock

2359 elif isinstance(dtype, ExtensionDtype):

2360 # Note: need to be sure PandasArray is unwrapped before we get here

2361 cls = ExtensionBlock

2362

2363 elif kind in ["M", "m"]:

2364 cls = DatetimeLikeBlock

2365 elif kind in ["f", "c", "i", "u", "b"]:

2366 cls = NumericBlock

2367 else:

2368 cls = ObjectBlock

2369 return cls

2370

2371

2372def new_block_2d(

2373 values: ArrayLike, placement: BlockPlacement, refs: BlockValuesRefs | None = None

2374):

2375 # new_block specialized to case with

2376 # ndim=2

2377 # isinstance(placement, BlockPlacement)

2378 # check_ndim/ensure_block_shape already checked

2379 klass = get_block_type(values.dtype)

2380

2381 values = maybe_coerce_values(values)

2382 return klass(values, ndim=2, placement=placement, refs=refs)

2383

2384

2385def new_block(

2386 values, placement, *, ndim: int, refs: BlockValuesRefs | None = None

2387) -> Block:

2388 # caller is responsible for ensuring values is NOT a PandasArray

2389

2390 if not isinstance(placement, BlockPlacement):

2391 placement = BlockPlacement(placement)

2392

2393 check_ndim(values, placement, ndim)

2394

2395 klass = get_block_type(values.dtype)

2396

2397 values = maybe_coerce_values(values)

2398 return klass(values, ndim=ndim, placement=placement, refs=refs)

2399

2400

2401def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:

2402 """

2403 ndim inference and validation.

2404

2405 Validates that values.ndim and ndim are consistent.

2406 Validates that len(values) and len(placement) are consistent.

2407

2408 Parameters

2409 ----------

2410 values : array-like

2411 placement : BlockPlacement

2412 ndim : int

2413

2414 Raises

2415 ------

2416 ValueError : the number of dimensions do not match

2417 """

2418

2419 if values.ndim > ndim:

2420 # Check for both np.ndarray and ExtensionArray

2421 raise ValueError(

2422 "Wrong number of dimensions. "

2423 f"values.ndim > ndim [{values.ndim} > {ndim}]"

2424 )

2425

2426 if not is_1d_only_ea_dtype(values.dtype):

2427 # TODO(EA2D): special case not needed with 2D EAs

2428 if values.ndim != ndim:

2429 raise ValueError(

2430 "Wrong number of dimensions. "

2431 f"values.ndim != ndim [{values.ndim} != {ndim}]"

2432 )

2433 if len(placement) != len(values):

2434 raise ValueError(

2435 f"Wrong number of items passed {len(values)}, "

2436 f"placement implies {len(placement)}"

2437 )

2438 elif ndim == 2 and len(placement) != 1:

2439 # TODO(EA2D): special case unnecessary with 2D EAs

2440 raise ValueError("need to split")

2441

2442

2443def extract_pandas_array(

2444 values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int

2445) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]:

2446 """

2447 Ensure that we don't allow PandasArray / PandasDtype in internals.

2448 """

2449 # For now, blocks should be backed by ndarrays when possible.

2450 if isinstance(values, ABCPandasArray):

2451 values = values.to_numpy()

2452 if ndim and ndim > 1:

2453 # TODO(EA2D): special case not needed with 2D EAs

2454 values = np.atleast_2d(values)

2455

2456 if isinstance(dtype, PandasDtype):

2457 dtype = dtype.numpy_dtype

2458

2459 return values, dtype

2460

2461

2462# -----------------------------------------------------------------

2463

2464

2465def extend_blocks(result, blocks=None) -> list[Block]:

2466 """return a new extended blocks, given the result"""

2467 if blocks is None:

2468 blocks = []

2469 if isinstance(result, list):

2470 for r in result:

2471 if isinstance(r, list):

2472 blocks.extend(r)

2473 else:

2474 blocks.append(r)

2475 else:

2476 assert isinstance(result, Block), type(result)

2477 blocks.append(result)

2478 return blocks

2479

2480

2481def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:

2482 """

2483 Reshape if possible to have values.ndim == ndim.

2484 """

2485

2486 if values.ndim < ndim:

2487 if not is_1d_only_ea_dtype(values.dtype):

2488 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023

2489 # block.shape is incorrect for "2D" ExtensionArrays

2490 # We can't, and don't need to, reshape.

2491 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values)

2492 values = values.reshape(1, -1)

2493

2494 return values

2495

2496

2497def to_native_types(

2498 values: ArrayLike,

2499 *,

2500 na_rep: str = "nan",

2501 quoting=None,

2502 float_format=None,

2503 decimal: str = ".",

2504 **kwargs,

2505) -> np.ndarray:

2506 """convert to our native types format"""

2507 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":

2508 # GH#40754 Convert categorical datetimes to datetime array

2509 values = algos.take_nd(

2510 values.categories._values,

2511 ensure_platform_int(values._codes),

2512 fill_value=na_rep,

2513 )

2514

2515 values = ensure_wrapped_if_datetimelike(values)

2516

2517 if isinstance(values, (DatetimeArray, TimedeltaArray)):

2518 if values.ndim == 1:

2519 result = values._format_native_types(na_rep=na_rep, **kwargs)

2520 result = result.astype(object, copy=False)

2521 return result

2522

2523 # GH#21734 Process every column separately, they might have different formats

2524 results_converted = []

2525 for i in range(len(values)):

2526 result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs)

2527 results_converted.append(result.astype(object, copy=False))

2528 return np.vstack(results_converted)

2529

2530 elif values.dtype.kind == "f" and not is_sparse(values):

2531 # see GH#13418: no special formatting is desired at the

2532 # output (important for appropriate 'quoting' behaviour),

2533 # so do not pass it through the FloatArrayFormatter

2534 if float_format is None and decimal == ".":

2535 mask = isna(values)

2536

2537 if not quoting:

2538 values = values.astype(str)

2539 else:

2540 values = np.array(values, dtype="object")

2541

2542 values[mask] = na_rep

2543 values = values.astype(object, copy=False)

2544 return values

2545

2546 from pandas.io.formats.format import FloatArrayFormatter

2547

2548 formatter = FloatArrayFormatter(

2549 values,

2550 na_rep=na_rep,

2551 float_format=float_format,

2552 decimal=decimal,

2553 quoting=quoting,

2554 fixed_width=False,

2555 )

2556 res = formatter.get_result_as_array()

2557 res = res.astype(object, copy=False)

2558 return res

2559

2560 elif isinstance(values, ExtensionArray):

2561 mask = isna(values)

2562

2563 new_values = np.asarray(values.astype(object))

2564 new_values[mask] = na_rep

2565 return new_values

2566

2567 else:

2568 mask = isna(values)

2569 itemsize = writers.word_len(na_rep)

2570

2571 if values.dtype != _dtype_obj and not quoting and itemsize:

2572 values = values.astype(str)

2573 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:

2574 # enlarge for the na_rep

2575 values = values.astype(f"<U{itemsize}")

2576 else:

2577 values = np.array(values, dtype="object")

2578

2579 values[mask] = na_rep

2580 values = values.astype(object, copy=False)

2581 return values

2582

2583

2584def external_values(values: ArrayLike) -> ArrayLike:

2585 """

2586 The array that Series.values returns (public attribute).

2587

2588 This has some historical constraints, and is overridden in block

2589 subclasses to return the correct array (e.g. period returns

2590 object ndarray and datetimetz a datetime64[ns] ndarray instead of

2591 proper extension array).

2592 """

2593 if isinstance(values, (PeriodArray, IntervalArray)):

2594 return values.astype(object)

2595 elif isinstance(values, (DatetimeArray, TimedeltaArray)):

2596 # NB: for datetime64tz this is different from np.asarray(values), since

2597 # that returns an object-dtype ndarray of Timestamps.

2598 # Avoid raising in .astype in casting from dt64tz to dt64

2599 values = values._ndarray

2600

2601 if isinstance(values, np.ndarray) and using_copy_on_write():

2602 values = values.view()

2603 values.flags.writeable = False

2604

2605 # TODO(CoW) we should also mark our ExtensionArrays as read-only

2606

2607 return values